Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Ryan Poplin 2012-03-28 15:37:33 -04:00
commit 4a396d357d
15 changed files with 212 additions and 117 deletions

View File

@ -955,8 +955,8 @@
<jvmarg value="-Dpipeline.run=${pipeline.run}" />
<jvmarg value="-Djava.io.tmpdir=${java.io.tmpdir}" />
<jvmarg line="${cofoja.jvm.args}"/>
<!-- <jvmarg value="-Xdebug"/> -->
<!-- <jvmarg value="-Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=5005"/> -->
<!-- <jvmarg value="-Xdebug"/> -->
<!-- <jvmarg value="-Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=5678"/> -->
<classfileset dir="${java.public.test.classes}" includes="**/@{testtype}.class"/>
<classfileset dir="${java.private.test.classes}" erroronmissingdir="false">

View File

@ -177,11 +177,8 @@ public class GATKReport {
*/
public void print(PrintStream out) {
out.println(GATKREPORT_HEADER_PREFIX + getVersion().toString() + SEPARATOR + getTables().size());
for (GATKReportTable table : tables.values()) {
if (table.getNumRows() > 0) {
table.write(out);
}
}
for (GATKReportTable table : tables.values())
table.write(out);
}
public Collection<GATKReportTable> getTables() {

View File

@ -48,9 +48,9 @@ public enum GATKReportDataType {
Boolean("%[Bb]"),
/**
* Used for byte and char value. Will display as a char so use printable values!
* Used for char values. Will display as a char so use printable values!
*/
Byte("%[Cc]"),
Character("%[Cc]"),
/**
* Used for float and double values. Will output a decimal with format %.8f unless otherwise specified.
@ -58,7 +58,7 @@ public enum GATKReportDataType {
Decimal("%.*[EeFf]"),
/**
* Used for int, and long values. Will display the full number by default.
* Used for int, byte, short, and long values. Will display the full number by default.
*/
Integer("%[Dd]"),
@ -97,17 +97,26 @@ public enum GATKReportDataType {
GATKReportDataType value;
if (object instanceof Boolean) {
value = GATKReportDataType.Boolean;
} else if (object instanceof Byte || object instanceof Character) {
value = GATKReportDataType.Byte;
} else if (object instanceof Float || object instanceof Double) {
} else if (object instanceof Character) {
value = GATKReportDataType.Character;
} else if (object instanceof Float ||
object instanceof Double) {
value = GATKReportDataType.Decimal;
} else if (object instanceof Integer || object instanceof Long) {
} else if (object instanceof Integer ||
object instanceof Long ||
object instanceof Short ||
object instanceof Byte ) {
value = GATKReportDataType.Integer;
} else if (object instanceof String) {
value = GATKReportDataType.String;
} else {
value = GATKReportDataType.Unknown;
//throw new ReviewedStingException("GATKReport could not convert the data object into a GATKReportDataType. Acceptable data objects are found in the documentation.");
//throw new UserException("GATKReport could not convert the data object into a GATKReportDataType. Acceptable data objects are found in the documentation.");
}
return value;
}
@ -140,8 +149,8 @@ public enum GATKReportDataType {
return 0.0D;
case Boolean:
return false;
case Byte:
return (byte) 0;
case Character:
return '0';
case Integer:
return 0L;
case String:
@ -166,16 +175,7 @@ public enum GATKReportDataType {
case Boolean:
case Integer:
return a.toString().equals(b.toString());
case Byte:
// A mess that checks if the bytes and characters contain the same value
if ((a instanceof Character && b instanceof Character) ||
(a instanceof Byte && b instanceof Byte))
return a.toString().equals(b.toString());
else if (a instanceof Character && b instanceof Byte) {
return ((Character) a).charValue() == ((Byte) b).byteValue();
} else if (a instanceof Byte && b instanceof Character) {
return ((Byte) a).byteValue() == ((Character) b).charValue();
}
case Character:
case String:
default:
return a.equals(b);
@ -201,8 +201,8 @@ public enum GATKReportDataType {
return Long.parseLong(str);
case String:
return str;
case Byte:
return (byte) str.toCharArray()[0];
case Character:
return str.toCharArray()[0];
default:
return str;
}
@ -225,7 +225,7 @@ public enum GATKReportDataType {
return "%d";
case String:
return "%s";
case Byte:
case Character:
return "%c";
case Null:
default:

View File

@ -254,7 +254,7 @@ public class GATKReportTable {
* @param dottedColumnValues Period concatenated values.
* @return The first primary key matching the column values or throws an exception.
*/
public Object getPrimaryKey(String dottedColumnValues) {
public Object getPrimaryKeyByData(String dottedColumnValues) {
Object key = findPrimaryKey(dottedColumnValues);
if (key == null)
throw new ReviewedStingException("Attempted to get non-existent GATKReportTable key for values: " + dottedColumnValues);
@ -411,9 +411,8 @@ public class GATKReportTable {
if (value == null)
value = "null";
// This code is bs. Why am do I have to conform to bad code
// Below is some ode to convert a string into its appropriate type.
// This is just Roger ranting
// This code below is bs. Why am do I have to conform to bad code
// Below is some code to convert a string into its appropriate type.
// If we got a string but the column is not a String type
Object newValue = null;
@ -431,7 +430,7 @@ public class GATKReportTable {
} catch (Exception e) {
}
}
if (column.getDataType().equals(GATKReportDataType.Byte) && ((String) value).length() == 1) {
if (column.getDataType().equals(GATKReportDataType.Character) && ((String) value).length() == 1) {
newValue = ((String) value).charAt(0);
}
@ -816,7 +815,7 @@ public class GATKReportTable {
out.println();
}
out.println();
out.println();
}
public int getNumRows() {
@ -877,8 +876,6 @@ public class GATKReportTable {
this.set(rowKey, columnKey, toAdd.get(rowKey));
//System.out.printf("Putting row with PK: %s \n", rowKey);
} else {
// TODO we should be able to handle combining data by adding, averaging, etc.
this.set(rowKey, columnKey, toAdd.get(rowKey));
System.out.printf("OVERWRITING Row with PK: %s \n", rowKey);

View File

@ -94,8 +94,11 @@ public class RecalibrationReport {
BitSet key = entry.getKey();
RecalDatum otherDatum = entry.getValue();
RecalDatum thisDatum = thisTable.get(key);
thisDatum.increment(otherDatum); // add the two datum objects into 'this'
thisDatum.resetCalculatedQualities(); // reset the empirical quality to make sure the user doesn't forget to recalculate it
if (thisDatum == null)
thisDatum = otherDatum; // sometimes the datum in other won't be present in 'this'. So just assign it!
else
thisDatum.increment(otherDatum); // add the two datum objects into 'this'
thisDatum.resetCalculatedQualities(); // reset the empirical quality to make sure the user doesn't forget to recalculate it
}
}
}

View File

@ -41,7 +41,8 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
public enum Model {
/** The default model with the best performance in all cases */
EXACT
EXACT,
POOL
}
protected int N;

View File

@ -152,7 +152,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
@Override
public boolean equals(Object obj) {
return (obj instanceof ExactACcounts) ? Arrays.equals(counts, ((ExactACcounts)obj).counts) : false;
return (obj instanceof ExactACcounts) && Arrays.equals(counts, ((ExactACcounts)obj).counts);
}
@Override
@ -202,7 +202,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
}
public boolean equals(Object obj) {
return (obj instanceof ExactACset) ? ACcounts.equals(((ExactACset)obj).ACcounts) : false;
return (obj instanceof ExactACset) && ACcounts.equals(((ExactACset)obj).ACcounts);
}
}

View File

@ -47,9 +47,17 @@ import java.util.Map;
*/
public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
/* public enum Model {
SNP,
INDEL,
BOTH
}
*/
public enum Model {
SNP,
INDEL,
POOLSNP,
POOLINDEL,
BOTH
}
@ -60,7 +68,7 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
GENOTYPE_GIVEN_ALLELES
}
protected UnifiedArgumentCollection UAC;
protected final UnifiedArgumentCollection UAC;
protected Logger logger;
/**
@ -70,7 +78,7 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
*/
protected GenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
if ( logger == null || UAC == null ) throw new ReviewedStingException("Bad arguments");
this.UAC = UAC.clone();
this.UAC = UAC;
this.logger = logger;
}

View File

@ -38,7 +38,7 @@ public class UnifiedArgumentCollection {
* Controls the model used to calculate the probability that a site is variant plus the various sample genotypes in the data at a given locus.
*/
@Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ -- EXACT is the default option, while GRID_SEARCH is also available.", required = false)
public AlleleFrequencyCalculationModel.Model AFmodel = AlleleFrequencyCalculationModel.Model.EXACT;
protected AlleleFrequencyCalculationModel.Model AFmodel = AlleleFrequencyCalculationModel.Model.EXACT;
/**
* The expected heterozygosity value used to compute prior likelihoods for any locus. The default priors are:

View File

@ -222,7 +222,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tMLE\tMAP");
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples);
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples, 2*samples.size());
// initialize the header
Set<VCFHeaderLine> headerInfo = getHeaderInfo();

View File

@ -36,14 +36,17 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variantcontext.*;
import java.io.PrintStream;
import java.lang.reflect.Constructor;
import java.util.*;
public class UnifiedGenotyperEngine {
@ -71,7 +74,7 @@ public class UnifiedGenotyperEngine {
private final VariantAnnotatorEngine annotationEngine;
// the model used for calculating genotypes
private ThreadLocal<Map<GenotypeLikelihoodsCalculationModel.Model, GenotypeLikelihoodsCalculationModel>> glcm = new ThreadLocal<Map<GenotypeLikelihoodsCalculationModel.Model, GenotypeLikelihoodsCalculationModel>>();
private ThreadLocal<Map<String, GenotypeLikelihoodsCalculationModel>> glcm = new ThreadLocal<Map<String, GenotypeLikelihoodsCalculationModel>>();
// the model used for calculating p(non-ref)
private ThreadLocal<AlleleFrequencyCalculationModel> afcm = new ThreadLocal<AlleleFrequencyCalculationModel>();
@ -112,22 +115,22 @@ public class UnifiedGenotyperEngine {
// ---------------------------------------------------------------------------------------------------------
@Requires({"toolkit != null", "UAC != null"})
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) {
this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()));
this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), 2*(SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()).size()));
}
@Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0"})
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, Set<String> samples) {
@Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0","N>0"})
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, Set<String> samples, int N) {
this.BAQEnabledOnCMDLine = toolkit.getArguments().BAQMode != BAQ.CalculationMode.OFF;
genomeLocParser = toolkit.getGenomeLocParser();
this.samples = new TreeSet<String>(samples);
// note that, because we cap the base quality by the mapping quality, minMQ cannot be less than minBQ
this.UAC = UAC.clone();
this.UAC = UAC;
this.logger = logger;
this.verboseWriter = verboseWriter;
this.annotationEngine = engine;
N = 2 * this.samples.size();
this.N = N;
log10AlleleFrequencyPriorsSNPs = new double[N+1];
log10AlleleFrequencyPriorsIndels = new double[N+1];
computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, UAC.heterozygosity);
@ -219,7 +222,7 @@ public class UnifiedGenotyperEngine {
glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC));
}
return glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser);
return glcm.get().get(model.name()).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser);
}
private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
@ -433,7 +436,7 @@ public class UnifiedGenotyperEngine {
if ( !BaseUtils.isRegularBase( refContext.getBase() ) )
return null;
if ( model == GenotypeLikelihoodsCalculationModel.Model.INDEL ) {
if ( model.name().toUpperCase().contains("INDEL")) {
if (UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
// regular pileup in this case
@ -463,7 +466,7 @@ public class UnifiedGenotyperEngine {
// stratify the AlignmentContext and cut by sample
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
}
} else if ( model == GenotypeLikelihoodsCalculationModel.Model.SNP ) {
} else if ( model.name().toUpperCase().contains("SNP") ) {
// stratify the AlignmentContext and cut by sample
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(rawContext.getBasePileup());
@ -594,21 +597,27 @@ public class UnifiedGenotyperEngine {
return null;
if (vcInput.isSNP()) {
if (( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP))
if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH )
return GenotypeLikelihoodsCalculationModel.Model.SNP;
else if ( UAC.GLmodel.name().toUpperCase().contains("SNP"))
return UAC.GLmodel;
else
// ignore SNP's if user chose INDEL mode
return null;
}
else if ((vcInput.isIndel() || vcInput.isMixed()) && (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL))
return GenotypeLikelihoodsCalculationModel.Model.INDEL;
else if ((vcInput.isIndel() || vcInput.isMixed())) {
if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH )
return GenotypeLikelihoodsCalculationModel.Model.INDEL;
else if (UAC.GLmodel.name().toUpperCase().contains("INDEL"))
return UAC.GLmodel;
}
}
else {
// todo - this assumes SNP's take priority when BOTH is selected, should do a smarter way once extended events are removed
if( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP)
if( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH )
return GenotypeLikelihoodsCalculationModel.Model.SNP;
else if (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL)
return GenotypeLikelihoodsCalculationModel.Model.INDEL;
else if (UAC.GLmodel.name().toUpperCase().contains("SNP") || UAC.GLmodel.name().toUpperCase().contains("INDEL"))
return UAC.GLmodel;
}
}
return null;
@ -630,58 +639,77 @@ public class UnifiedGenotyperEngine {
}
protected double[] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Model model ) {
switch( model ) {
case SNP:
return log10AlleleFrequencyPriorsSNPs;
case INDEL:
return log10AlleleFrequencyPriorsIndels;
default: throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
}
if (model.name().toUpperCase().contains("SNP"))
return log10AlleleFrequencyPriorsSNPs;
else if (model.name().toUpperCase().contains("INDEL"))
return log10AlleleFrequencyPriorsIndels;
else
throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
}
private static GenotypePriors createGenotypePriors( final GenotypeLikelihoodsCalculationModel.Model model ) {
GenotypePriors priors;
switch ( model ) {
case SNP:
// use flat priors for GLs
priors = new DiploidSNPGenotypePriors();
break;
case INDEL:
// create flat priors for Indels, actual priors will depend on event length to be genotyped
priors = new DiploidIndelGenotypePriors();
break;
default: throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
}
if( model.name().contains("SNP") )
priors = new DiploidSNPGenotypePriors();
else if( model.name().contains("INDEL") )
priors = new DiploidIndelGenotypePriors();
else throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
return priors;
}
protected GenotypePriors getGenotypePriors( final GenotypeLikelihoodsCalculationModel.Model model ) {
switch( model ) {
case SNP:
return genotypePriorsSNPs;
case INDEL:
return genotypePriorsIndels;
default: throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
}
if( model.name().contains("SNP") )
return genotypePriorsSNPs;
if( model.name().contains("INDEL") )
return genotypePriorsIndels;
else throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
}
private static Map<GenotypeLikelihoodsCalculationModel.Model, GenotypeLikelihoodsCalculationModel> getGenotypeLikelihoodsCalculationObject(Logger logger, UnifiedArgumentCollection UAC) {
Map<GenotypeLikelihoodsCalculationModel.Model, GenotypeLikelihoodsCalculationModel> glcm = new HashMap<GenotypeLikelihoodsCalculationModel.Model, GenotypeLikelihoodsCalculationModel>();
glcm.put(GenotypeLikelihoodsCalculationModel.Model.SNP, new SNPGenotypeLikelihoodsCalculationModel(UAC, logger));
glcm.put(GenotypeLikelihoodsCalculationModel.Model.INDEL, new IndelGenotypeLikelihoodsCalculationModel(UAC, logger));
private static Map<String,GenotypeLikelihoodsCalculationModel> getGenotypeLikelihoodsCalculationObject(Logger logger, UnifiedArgumentCollection UAC) {
Map<String, GenotypeLikelihoodsCalculationModel> glcm = new HashMap<String, GenotypeLikelihoodsCalculationModel>();
// GenotypeLikelihoodsCalculationModel.Model.
List<Class<? extends GenotypeLikelihoodsCalculationModel>> glmClasses = new PluginManager<GenotypeLikelihoodsCalculationModel>(GenotypeLikelihoodsCalculationModel.class).getPlugins();
for (int i = 0; i < glmClasses.size(); i++) {
Class<? extends GenotypeLikelihoodsCalculationModel> glmClass = glmClasses.get(i);
String key = glmClass.getSimpleName().replaceAll("GenotypeLikelihoodsCalculationModel","").toUpperCase();
//System.out.println("KEY:"+key+"\t" + glmClass.getSimpleName());
try {
Object args[] = new Object[]{UAC,logger};
Constructor c = glmClass.getDeclaredConstructor(UnifiedArgumentCollection.class, Logger.class);
glcm.put(key, (GenotypeLikelihoodsCalculationModel)c.newInstance(args));
}
catch (Exception e) {
throw new UserException("Incorrect specification for argument glm:"+UAC.GLmodel+e.getMessage());
}
}
return glcm;
}
private static AlleleFrequencyCalculationModel getAlleleFrequencyCalculationObject(int N, Logger logger, PrintStream verboseWriter, UnifiedArgumentCollection UAC) {
AlleleFrequencyCalculationModel afcm;
switch ( UAC.AFmodel ) {
case EXACT:
afcm = new ExactAFCalculationModel(UAC, N, logger, verboseWriter);
break;
default: throw new IllegalArgumentException("Unexpected AlleleFrequencyCalculationModel " + UAC.AFmodel);
}
List<Class<? extends AlleleFrequencyCalculationModel>> afClasses = new PluginManager<AlleleFrequencyCalculationModel>(AlleleFrequencyCalculationModel.class).getPlugins();
return afcm;
for (int i = 0; i < afClasses.size(); i++) {
Class<? extends AlleleFrequencyCalculationModel> afClass = afClasses.get(i);
String key = afClass.getSimpleName().replace("AFCalculationModel","").toUpperCase();
if (UAC.AFmodel.name().equalsIgnoreCase(key)) {
try {
Object args[] = new Object[]{UAC,N,logger,verboseWriter};
Constructor c = afClass.getDeclaredConstructor(UnifiedArgumentCollection.class, int.class, Logger.class, PrintStream.class);
return (AlleleFrequencyCalculationModel)c.newInstance(args);
}
catch (Exception e) {
throw new IllegalArgumentException("Unexpected AlleleFrequencyCalculationModel " + UAC.AFmodel);
}
}
}
throw new IllegalArgumentException("Unexpected AlleleFrequencyCalculationModel " + UAC.AFmodel);
}
public static VariantContext getVCFromAllelesRod(RefMetaDataTracker tracker, ReferenceContext ref, GenomeLoc loc, boolean requireSNP, Logger logger, final RodBinding<VariantContext> allelesBinding) {

View File

@ -29,6 +29,7 @@ import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.math.BigDecimal;
@ -48,6 +49,7 @@ public class MathUtils {
}
public static final double[] log10Cache;
public static final double[] log10FactorialCache;
private static final double[] jacobianLogTable;
private static final double JACOBIAN_LOG_TABLE_STEP = 0.001;
private static final double JACOBIAN_LOG_TABLE_INV_STEP = 1.0 / 0.001;
@ -58,11 +60,14 @@ public class MathUtils {
static {
log10Cache = new double[LOG10_CACHE_SIZE];
log10FactorialCache = new double[LOG10_CACHE_SIZE];
jacobianLogTable = new double[JACOBIAN_LOG_TABLE_SIZE];
log10Cache[0] = Double.NEGATIVE_INFINITY;
for (int k = 1; k < LOG10_CACHE_SIZE; k++)
for (int k = 1; k < LOG10_CACHE_SIZE; k++) {
log10Cache[k] = Math.log10(k);
log10FactorialCache[k] = log10FactorialCache[k-1] + log10Cache[k];
}
for (int k = 0; k < JACOBIAN_LOG_TABLE_SIZE; k++) {
jacobianLogTable[k] = Math.log10(1.0 + Math.pow(10.0, -((double) k) * JACOBIAN_LOG_TABLE_STEP));
@ -233,6 +238,9 @@ public class MathUtils {
double sum = 0.0;
double maxValue = Utils.findMaxEntry(log10p);
if(maxValue == Double.NEGATIVE_INFINITY)
return sum;
for (int i = start; i < finish; i++) {
sum += Math.pow(10.0, log10p[i] - maxValue);
}
@ -1046,6 +1054,28 @@ public class MathUtils {
}
/**
* Given two log-probability vectors, compute log of vector product of them:
* in Matlab notation, return log10(10.*x'*10.^y)
* @param x vector 1
* @param y vector 2
* @return a double representing log (dotProd(10.^x,10.^y)
*/
public static double logDotProduct(double [] x, double[] y) {
if (x.length != y.length)
throw new ReviewedStingException("BUG: Vectors of different lengths");
double tmpVec[] = new double[x.length];
for (int k=0; k < tmpVec.length; k++ ) {
tmpVec[k] = x[k]+y[k];
}
return log10sumLog10(tmpVec);
}
public static Object getMedian(List<Comparable> list) {
return orderStatisticSearch((int) Math.ceil(list.size() / 2), list);
}
@ -1485,6 +1515,24 @@ public class MathUtils {
return result;
}
/** Same routine, unboxed types for efficiency
*
* @param x
* @param y
* @return Vector of same length as x and y so that z[k] = x[k]+y[k]
*/
public static double[] vectorSum(double[]x, double[] y) {
if (x.length != y.length)
throw new ReviewedStingException("BUG: Lengths of x and y must be the same");
double[] result = new double[x.length];
for (int k=0; k <x.length; k++)
result[k] = x[k]+y[k];
return result;
}
public static <E extends Number> Double[] scalarTimesVector(E a, E[] v1) {
Double result[] = new Double[v1.length];

View File

@ -34,21 +34,22 @@ import java.io.IOException;
import java.io.PrintStream;
public class GATKReportUnitTest extends BaseTest {
@Test(enabled = false)
@Test
public void testParse() throws Exception {
String reportPath = validationDataLocation + "exampleGATKReportv1.tbl";
GATKReport report = new GATKReport(reportPath);
Assert.assertEquals(report.getVersion(), GATKReportVersion.V1_0);
Assert.assertEquals(report.getTables().size(), 5);
GATKReportTable countVariants = report.getTable("CountVariants");
//Assert.assertEquals(countVariants.getVersion(), GATKReportVersion.V0_1);
Object countVariantsPK = countVariants.getPrimaryKey("none.eval.none.all");
Assert.assertEquals(countVariants.get(countVariantsPK, "nProcessedLoci"), "100000");
Assert.assertEquals(countVariants.get(countVariantsPK, "nNoCalls"), "99872");
Object countVariantsPK = countVariants.getPrimaryKeyByData("dbsnp.eval.none.all");
Assert.assertEquals(countVariants.get(countVariantsPK, "nProcessedLoci"), "63025520");
Assert.assertEquals(countVariants.get(countVariantsPK, "nNoCalls"), "0");
Assert.assertEquals(countVariants.get(countVariantsPK, "heterozygosity"), 4.73e-06);
GATKReportTable validationReport = report.getTable("ValidationReport");
//Assert.assertEquals(validationReport.getVersion(), GATKReportVersion.V0_1);
Object validationReportPK = countVariants.getPrimaryKey("none.eval.none.known");
Assert.assertEquals(validationReport.get(validationReportPK, "sensitivity"), "NaN");
Object validationReportPK = countVariants.getPrimaryKeyByData("dbsnp.eval.none.novel");
Assert.assertEquals(validationReport.get(validationReportPK, "PPV"), Double.NaN);
}
@DataProvider(name = "rightAlignValues")
@ -117,15 +118,15 @@ public class GATKReportUnitTest extends BaseTest {
report1.addTable("TableName", "Description");
report1.getTable("TableName").addPrimaryKey("id", displayPK);
report1.getTable("TableName").addColumn("colA", GATKReportDataType.String.getDefaultValue(), "%s");
report1.getTable("TableName").addColumn("colB", GATKReportDataType.Byte.getDefaultValue(), "%c");
report1.getTable("TableName").addColumn("colB", GATKReportDataType.Character.getDefaultValue(), "%c");
report1.getTable("TableName").set(1, "colA", "NotNum");
report1.getTable("TableName").set(1, "colB", (byte) 64);
report1.getTable("TableName").set(1, "colB", (char) 64);
report2 = new GATKReport();
report2.addTable("TableName", "Description");
report2.getTable("TableName").addPrimaryKey("id", displayPK);
report2.getTable("TableName").addColumn("colA", GATKReportDataType.String.getDefaultValue(), "%s");
report2.getTable("TableName").addColumn("colB", GATKReportDataType.Byte.getDefaultValue(), "%c");
report2.getTable("TableName").addColumn("colB", GATKReportDataType.Character.getDefaultValue(), "%c");
report2.getTable("TableName").set(2, "colA", "df3");
report2.getTable("TableName").set(2, "colB", 'A');
@ -133,7 +134,7 @@ public class GATKReportUnitTest extends BaseTest {
report3.addTable("TableName", "Description");
report3.getTable("TableName").addPrimaryKey("id", displayPK);
report3.getTable("TableName").addColumn("colA", GATKReportDataType.String.getDefaultValue(), "%s");
report3.getTable("TableName").addColumn("colB", GATKReportDataType.Byte.getDefaultValue(), "%c");
report3.getTable("TableName").addColumn("colB", GATKReportDataType.Character.getDefaultValue(), "%c");
report3.getTable("TableName").set(3, "colA", "df5f");
report3.getTable("TableName").set(3, "colB", 'c');
@ -146,13 +147,13 @@ public class GATKReportUnitTest extends BaseTest {
table.addColumn("SomeInt", GATKReportDataType.Integer.getDefaultValue(), true, "%d");
table.addColumn("SomeFloat", GATKReportDataType.Decimal.getDefaultValue(), true, "%.16E");
table.addColumn("TrueFalse", false, true, "%B");
table.set("12df", "SomeInt", 34);
table.set("12df", "SomeInt", Byte.MAX_VALUE);
table.set("12df", "SomeFloat", 34.0);
table.set("12df", "TrueFalse", true);
table.set("5f", "SomeInt", -1);
table.set("5f", "SomeFloat", 0.000003);
table.set("5f", "SomeInt", Short.MAX_VALUE);
table.set("5f", "SomeFloat", Double.MAX_VALUE);
table.set("5f", "TrueFalse", false);
table.set("RZ", "SomeInt", 904948230958203958L);
table.set("RZ", "SomeInt", Long.MAX_VALUE);
table.set("RZ", "SomeFloat", 535646345.657453464576);
table.set("RZ", "TrueFalse", true);

View File

@ -284,6 +284,18 @@ public class MathUtilsUnitTest extends BaseTest {
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[] {-1.0, -3.0, -1.0, -2.0}), new double[] {0.1 * 1.0 / 0.211, 0.001 * 1.0 / 0.211, 0.1 * 1.0 / 0.211, 0.01 * 1.0 / 0.211}));
}
@Test
public void testDotProduct() {
Assert.assertEquals(MathUtils.dotProduct(new Double[]{-5.0,-3.0,2.0}, new Double[]{6.0,7.0,8.0}),-35.0,1e-3);
Assert.assertEquals(MathUtils.dotProduct(new Double[]{-5.0}, new Double[]{6.0}),-30.0,1e-3);
}
@Test
public void testLogDotProduct() {
Assert.assertEquals(MathUtils.logDotProduct(new double[]{-5.0,-3.0,2.0}, new double[]{6.0,7.0,8.0}),10.0,1e-3);
Assert.assertEquals(MathUtils.logDotProduct(new double[]{-5.0}, new double[]{6.0}),1.0,1e-3);
}
/**
* Private function used by testNormalizeFromLog10()
*/

View File

@ -136,7 +136,7 @@ object PipelineTest extends BaseTest with Logging {
println(" value (min,target,max) table key metric")
for (validation <- evalSpec.validations) {
val table = report.getTable(validation.table)
val key = table.getPrimaryKey(validation.key)
val key = table.getPrimaryKeyByData(validation.key)
val value = String.valueOf(table.get(key, validation.metric))
val inRange = if (value == null) false else validation.inRange(value)
val flag = if (!inRange) "*" else " "