Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
4a396d357d
|
|
@ -955,8 +955,8 @@
|
|||
<jvmarg value="-Dpipeline.run=${pipeline.run}" />
|
||||
<jvmarg value="-Djava.io.tmpdir=${java.io.tmpdir}" />
|
||||
<jvmarg line="${cofoja.jvm.args}"/>
|
||||
<!-- <jvmarg value="-Xdebug"/> -->
|
||||
<!-- <jvmarg value="-Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=5005"/> -->
|
||||
<!-- <jvmarg value="-Xdebug"/> -->
|
||||
<!-- <jvmarg value="-Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=5678"/> -->
|
||||
|
||||
<classfileset dir="${java.public.test.classes}" includes="**/@{testtype}.class"/>
|
||||
<classfileset dir="${java.private.test.classes}" erroronmissingdir="false">
|
||||
|
|
|
|||
|
|
@ -177,11 +177,8 @@ public class GATKReport {
|
|||
*/
|
||||
public void print(PrintStream out) {
|
||||
out.println(GATKREPORT_HEADER_PREFIX + getVersion().toString() + SEPARATOR + getTables().size());
|
||||
for (GATKReportTable table : tables.values()) {
|
||||
if (table.getNumRows() > 0) {
|
||||
table.write(out);
|
||||
}
|
||||
}
|
||||
for (GATKReportTable table : tables.values())
|
||||
table.write(out);
|
||||
}
|
||||
|
||||
public Collection<GATKReportTable> getTables() {
|
||||
|
|
|
|||
|
|
@ -48,9 +48,9 @@ public enum GATKReportDataType {
|
|||
Boolean("%[Bb]"),
|
||||
|
||||
/**
|
||||
* Used for byte and char value. Will display as a char so use printable values!
|
||||
* Used for char values. Will display as a char so use printable values!
|
||||
*/
|
||||
Byte("%[Cc]"),
|
||||
Character("%[Cc]"),
|
||||
|
||||
/**
|
||||
* Used for float and double values. Will output a decimal with format %.8f unless otherwise specified.
|
||||
|
|
@ -58,7 +58,7 @@ public enum GATKReportDataType {
|
|||
Decimal("%.*[EeFf]"),
|
||||
|
||||
/**
|
||||
* Used for int, and long values. Will display the full number by default.
|
||||
* Used for int, byte, short, and long values. Will display the full number by default.
|
||||
*/
|
||||
Integer("%[Dd]"),
|
||||
|
||||
|
|
@ -97,17 +97,26 @@ public enum GATKReportDataType {
|
|||
GATKReportDataType value;
|
||||
if (object instanceof Boolean) {
|
||||
value = GATKReportDataType.Boolean;
|
||||
} else if (object instanceof Byte || object instanceof Character) {
|
||||
value = GATKReportDataType.Byte;
|
||||
} else if (object instanceof Float || object instanceof Double) {
|
||||
|
||||
} else if (object instanceof Character) {
|
||||
value = GATKReportDataType.Character;
|
||||
|
||||
} else if (object instanceof Float ||
|
||||
object instanceof Double) {
|
||||
value = GATKReportDataType.Decimal;
|
||||
} else if (object instanceof Integer || object instanceof Long) {
|
||||
|
||||
} else if (object instanceof Integer ||
|
||||
object instanceof Long ||
|
||||
object instanceof Short ||
|
||||
object instanceof Byte ) {
|
||||
value = GATKReportDataType.Integer;
|
||||
|
||||
} else if (object instanceof String) {
|
||||
value = GATKReportDataType.String;
|
||||
|
||||
} else {
|
||||
value = GATKReportDataType.Unknown;
|
||||
//throw new ReviewedStingException("GATKReport could not convert the data object into a GATKReportDataType. Acceptable data objects are found in the documentation.");
|
||||
//throw new UserException("GATKReport could not convert the data object into a GATKReportDataType. Acceptable data objects are found in the documentation.");
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
|
@ -140,8 +149,8 @@ public enum GATKReportDataType {
|
|||
return 0.0D;
|
||||
case Boolean:
|
||||
return false;
|
||||
case Byte:
|
||||
return (byte) 0;
|
||||
case Character:
|
||||
return '0';
|
||||
case Integer:
|
||||
return 0L;
|
||||
case String:
|
||||
|
|
@ -166,16 +175,7 @@ public enum GATKReportDataType {
|
|||
case Boolean:
|
||||
case Integer:
|
||||
return a.toString().equals(b.toString());
|
||||
case Byte:
|
||||
// A mess that checks if the bytes and characters contain the same value
|
||||
if ((a instanceof Character && b instanceof Character) ||
|
||||
(a instanceof Byte && b instanceof Byte))
|
||||
return a.toString().equals(b.toString());
|
||||
else if (a instanceof Character && b instanceof Byte) {
|
||||
return ((Character) a).charValue() == ((Byte) b).byteValue();
|
||||
} else if (a instanceof Byte && b instanceof Character) {
|
||||
return ((Byte) a).byteValue() == ((Character) b).charValue();
|
||||
}
|
||||
case Character:
|
||||
case String:
|
||||
default:
|
||||
return a.equals(b);
|
||||
|
|
@ -201,8 +201,8 @@ public enum GATKReportDataType {
|
|||
return Long.parseLong(str);
|
||||
case String:
|
||||
return str;
|
||||
case Byte:
|
||||
return (byte) str.toCharArray()[0];
|
||||
case Character:
|
||||
return str.toCharArray()[0];
|
||||
default:
|
||||
return str;
|
||||
}
|
||||
|
|
@ -225,7 +225,7 @@ public enum GATKReportDataType {
|
|||
return "%d";
|
||||
case String:
|
||||
return "%s";
|
||||
case Byte:
|
||||
case Character:
|
||||
return "%c";
|
||||
case Null:
|
||||
default:
|
||||
|
|
|
|||
|
|
@ -254,7 +254,7 @@ public class GATKReportTable {
|
|||
* @param dottedColumnValues Period concatenated values.
|
||||
* @return The first primary key matching the column values or throws an exception.
|
||||
*/
|
||||
public Object getPrimaryKey(String dottedColumnValues) {
|
||||
public Object getPrimaryKeyByData(String dottedColumnValues) {
|
||||
Object key = findPrimaryKey(dottedColumnValues);
|
||||
if (key == null)
|
||||
throw new ReviewedStingException("Attempted to get non-existent GATKReportTable key for values: " + dottedColumnValues);
|
||||
|
|
@ -411,9 +411,8 @@ public class GATKReportTable {
|
|||
if (value == null)
|
||||
value = "null";
|
||||
|
||||
// This code is bs. Why am do I have to conform to bad code
|
||||
// Below is some ode to convert a string into its appropriate type.
|
||||
// This is just Roger ranting
|
||||
// This code below is bs. Why am do I have to conform to bad code
|
||||
// Below is some code to convert a string into its appropriate type.
|
||||
|
||||
// If we got a string but the column is not a String type
|
||||
Object newValue = null;
|
||||
|
|
@ -431,7 +430,7 @@ public class GATKReportTable {
|
|||
} catch (Exception e) {
|
||||
}
|
||||
}
|
||||
if (column.getDataType().equals(GATKReportDataType.Byte) && ((String) value).length() == 1) {
|
||||
if (column.getDataType().equals(GATKReportDataType.Character) && ((String) value).length() == 1) {
|
||||
newValue = ((String) value).charAt(0);
|
||||
|
||||
}
|
||||
|
|
@ -816,7 +815,7 @@ public class GATKReportTable {
|
|||
out.println();
|
||||
}
|
||||
|
||||
out.println();
|
||||
out.println();
|
||||
}
|
||||
|
||||
public int getNumRows() {
|
||||
|
|
@ -877,8 +876,6 @@ public class GATKReportTable {
|
|||
this.set(rowKey, columnKey, toAdd.get(rowKey));
|
||||
//System.out.printf("Putting row with PK: %s \n", rowKey);
|
||||
} else {
|
||||
|
||||
// TODO we should be able to handle combining data by adding, averaging, etc.
|
||||
this.set(rowKey, columnKey, toAdd.get(rowKey));
|
||||
|
||||
System.out.printf("OVERWRITING Row with PK: %s \n", rowKey);
|
||||
|
|
|
|||
|
|
@ -94,8 +94,11 @@ public class RecalibrationReport {
|
|||
BitSet key = entry.getKey();
|
||||
RecalDatum otherDatum = entry.getValue();
|
||||
RecalDatum thisDatum = thisTable.get(key);
|
||||
thisDatum.increment(otherDatum); // add the two datum objects into 'this'
|
||||
thisDatum.resetCalculatedQualities(); // reset the empirical quality to make sure the user doesn't forget to recalculate it
|
||||
if (thisDatum == null)
|
||||
thisDatum = otherDatum; // sometimes the datum in other won't be present in 'this'. So just assign it!
|
||||
else
|
||||
thisDatum.increment(otherDatum); // add the two datum objects into 'this'
|
||||
thisDatum.resetCalculatedQualities(); // reset the empirical quality to make sure the user doesn't forget to recalculate it
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -41,7 +41,8 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
|
|||
|
||||
public enum Model {
|
||||
/** The default model with the best performance in all cases */
|
||||
EXACT
|
||||
EXACT,
|
||||
POOL
|
||||
}
|
||||
|
||||
protected int N;
|
||||
|
|
|
|||
|
|
@ -152,7 +152,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
return (obj instanceof ExactACcounts) ? Arrays.equals(counts, ((ExactACcounts)obj).counts) : false;
|
||||
return (obj instanceof ExactACcounts) && Arrays.equals(counts, ((ExactACcounts)obj).counts);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -202,7 +202,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
}
|
||||
|
||||
public boolean equals(Object obj) {
|
||||
return (obj instanceof ExactACset) ? ACcounts.equals(((ExactACset)obj).ACcounts) : false;
|
||||
return (obj instanceof ExactACset) && ACcounts.equals(((ExactACset)obj).ACcounts);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -47,9 +47,17 @@ import java.util.Map;
|
|||
*/
|
||||
public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
|
||||
|
||||
/* public enum Model {
|
||||
SNP,
|
||||
INDEL,
|
||||
BOTH
|
||||
}
|
||||
*/
|
||||
public enum Model {
|
||||
SNP,
|
||||
INDEL,
|
||||
POOLSNP,
|
||||
POOLINDEL,
|
||||
BOTH
|
||||
}
|
||||
|
||||
|
|
@ -60,7 +68,7 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
|
|||
GENOTYPE_GIVEN_ALLELES
|
||||
}
|
||||
|
||||
protected UnifiedArgumentCollection UAC;
|
||||
protected final UnifiedArgumentCollection UAC;
|
||||
protected Logger logger;
|
||||
|
||||
/**
|
||||
|
|
@ -70,7 +78,7 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
|
|||
*/
|
||||
protected GenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
|
||||
if ( logger == null || UAC == null ) throw new ReviewedStingException("Bad arguments");
|
||||
this.UAC = UAC.clone();
|
||||
this.UAC = UAC;
|
||||
this.logger = logger;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ public class UnifiedArgumentCollection {
|
|||
* Controls the model used to calculate the probability that a site is variant plus the various sample genotypes in the data at a given locus.
|
||||
*/
|
||||
@Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ -- EXACT is the default option, while GRID_SEARCH is also available.", required = false)
|
||||
public AlleleFrequencyCalculationModel.Model AFmodel = AlleleFrequencyCalculationModel.Model.EXACT;
|
||||
protected AlleleFrequencyCalculationModel.Model AFmodel = AlleleFrequencyCalculationModel.Model.EXACT;
|
||||
|
||||
/**
|
||||
* The expected heterozygosity value used to compute prior likelihoods for any locus. The default priors are:
|
||||
|
|
|
|||
|
|
@ -222,7 +222,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tMLE\tMAP");
|
||||
|
||||
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
|
||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples);
|
||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples, 2*samples.size());
|
||||
|
||||
// initialize the header
|
||||
Set<VCFHeaderLine> headerInfo = getHeaderInfo();
|
||||
|
|
|
|||
|
|
@ -36,14 +36,17 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.util.*;
|
||||
|
||||
public class UnifiedGenotyperEngine {
|
||||
|
|
@ -71,7 +74,7 @@ public class UnifiedGenotyperEngine {
|
|||
private final VariantAnnotatorEngine annotationEngine;
|
||||
|
||||
// the model used for calculating genotypes
|
||||
private ThreadLocal<Map<GenotypeLikelihoodsCalculationModel.Model, GenotypeLikelihoodsCalculationModel>> glcm = new ThreadLocal<Map<GenotypeLikelihoodsCalculationModel.Model, GenotypeLikelihoodsCalculationModel>>();
|
||||
private ThreadLocal<Map<String, GenotypeLikelihoodsCalculationModel>> glcm = new ThreadLocal<Map<String, GenotypeLikelihoodsCalculationModel>>();
|
||||
|
||||
// the model used for calculating p(non-ref)
|
||||
private ThreadLocal<AlleleFrequencyCalculationModel> afcm = new ThreadLocal<AlleleFrequencyCalculationModel>();
|
||||
|
|
@ -112,22 +115,22 @@ public class UnifiedGenotyperEngine {
|
|||
// ---------------------------------------------------------------------------------------------------------
|
||||
@Requires({"toolkit != null", "UAC != null"})
|
||||
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) {
|
||||
this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()));
|
||||
this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), 2*(SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()).size()));
|
||||
}
|
||||
|
||||
@Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0"})
|
||||
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, Set<String> samples) {
|
||||
@Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0","N>0"})
|
||||
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, Set<String> samples, int N) {
|
||||
this.BAQEnabledOnCMDLine = toolkit.getArguments().BAQMode != BAQ.CalculationMode.OFF;
|
||||
genomeLocParser = toolkit.getGenomeLocParser();
|
||||
this.samples = new TreeSet<String>(samples);
|
||||
// note that, because we cap the base quality by the mapping quality, minMQ cannot be less than minBQ
|
||||
this.UAC = UAC.clone();
|
||||
this.UAC = UAC;
|
||||
|
||||
this.logger = logger;
|
||||
this.verboseWriter = verboseWriter;
|
||||
this.annotationEngine = engine;
|
||||
|
||||
N = 2 * this.samples.size();
|
||||
this.N = N;
|
||||
log10AlleleFrequencyPriorsSNPs = new double[N+1];
|
||||
log10AlleleFrequencyPriorsIndels = new double[N+1];
|
||||
computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, UAC.heterozygosity);
|
||||
|
|
@ -219,7 +222,7 @@ public class UnifiedGenotyperEngine {
|
|||
glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC));
|
||||
}
|
||||
|
||||
return glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser);
|
||||
return glcm.get().get(model.name()).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser);
|
||||
}
|
||||
|
||||
private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
|
||||
|
|
@ -433,7 +436,7 @@ public class UnifiedGenotyperEngine {
|
|||
if ( !BaseUtils.isRegularBase( refContext.getBase() ) )
|
||||
return null;
|
||||
|
||||
if ( model == GenotypeLikelihoodsCalculationModel.Model.INDEL ) {
|
||||
if ( model.name().toUpperCase().contains("INDEL")) {
|
||||
|
||||
if (UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
|
||||
// regular pileup in this case
|
||||
|
|
@ -463,7 +466,7 @@ public class UnifiedGenotyperEngine {
|
|||
// stratify the AlignmentContext and cut by sample
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
|
||||
}
|
||||
} else if ( model == GenotypeLikelihoodsCalculationModel.Model.SNP ) {
|
||||
} else if ( model.name().toUpperCase().contains("SNP") ) {
|
||||
|
||||
// stratify the AlignmentContext and cut by sample
|
||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(rawContext.getBasePileup());
|
||||
|
|
@ -594,21 +597,27 @@ public class UnifiedGenotyperEngine {
|
|||
return null;
|
||||
|
||||
if (vcInput.isSNP()) {
|
||||
if (( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP))
|
||||
if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH )
|
||||
return GenotypeLikelihoodsCalculationModel.Model.SNP;
|
||||
else if ( UAC.GLmodel.name().toUpperCase().contains("SNP"))
|
||||
return UAC.GLmodel;
|
||||
else
|
||||
// ignore SNP's if user chose INDEL mode
|
||||
return null;
|
||||
}
|
||||
else if ((vcInput.isIndel() || vcInput.isMixed()) && (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL))
|
||||
return GenotypeLikelihoodsCalculationModel.Model.INDEL;
|
||||
else if ((vcInput.isIndel() || vcInput.isMixed())) {
|
||||
if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH )
|
||||
return GenotypeLikelihoodsCalculationModel.Model.INDEL;
|
||||
else if (UAC.GLmodel.name().toUpperCase().contains("INDEL"))
|
||||
return UAC.GLmodel;
|
||||
}
|
||||
}
|
||||
else {
|
||||
// todo - this assumes SNP's take priority when BOTH is selected, should do a smarter way once extended events are removed
|
||||
if( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP)
|
||||
if( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH )
|
||||
return GenotypeLikelihoodsCalculationModel.Model.SNP;
|
||||
else if (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL)
|
||||
return GenotypeLikelihoodsCalculationModel.Model.INDEL;
|
||||
else if (UAC.GLmodel.name().toUpperCase().contains("SNP") || UAC.GLmodel.name().toUpperCase().contains("INDEL"))
|
||||
return UAC.GLmodel;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
|
|
@ -630,58 +639,77 @@ public class UnifiedGenotyperEngine {
|
|||
}
|
||||
|
||||
protected double[] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Model model ) {
|
||||
switch( model ) {
|
||||
case SNP:
|
||||
return log10AlleleFrequencyPriorsSNPs;
|
||||
case INDEL:
|
||||
return log10AlleleFrequencyPriorsIndels;
|
||||
default: throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
|
||||
}
|
||||
if (model.name().toUpperCase().contains("SNP"))
|
||||
return log10AlleleFrequencyPriorsSNPs;
|
||||
else if (model.name().toUpperCase().contains("INDEL"))
|
||||
return log10AlleleFrequencyPriorsIndels;
|
||||
else
|
||||
throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
|
||||
|
||||
}
|
||||
|
||||
private static GenotypePriors createGenotypePriors( final GenotypeLikelihoodsCalculationModel.Model model ) {
|
||||
GenotypePriors priors;
|
||||
switch ( model ) {
|
||||
case SNP:
|
||||
// use flat priors for GLs
|
||||
priors = new DiploidSNPGenotypePriors();
|
||||
break;
|
||||
case INDEL:
|
||||
// create flat priors for Indels, actual priors will depend on event length to be genotyped
|
||||
priors = new DiploidIndelGenotypePriors();
|
||||
break;
|
||||
default: throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
|
||||
}
|
||||
if( model.name().contains("SNP") )
|
||||
priors = new DiploidSNPGenotypePriors();
|
||||
else if( model.name().contains("INDEL") )
|
||||
priors = new DiploidIndelGenotypePriors();
|
||||
else throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
|
||||
|
||||
return priors;
|
||||
}
|
||||
|
||||
protected GenotypePriors getGenotypePriors( final GenotypeLikelihoodsCalculationModel.Model model ) {
|
||||
switch( model ) {
|
||||
case SNP:
|
||||
return genotypePriorsSNPs;
|
||||
case INDEL:
|
||||
return genotypePriorsIndels;
|
||||
default: throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
|
||||
}
|
||||
if( model.name().contains("SNP") )
|
||||
return genotypePriorsSNPs;
|
||||
if( model.name().contains("INDEL") )
|
||||
return genotypePriorsIndels;
|
||||
else throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
|
||||
}
|
||||
|
||||
private static Map<GenotypeLikelihoodsCalculationModel.Model, GenotypeLikelihoodsCalculationModel> getGenotypeLikelihoodsCalculationObject(Logger logger, UnifiedArgumentCollection UAC) {
|
||||
Map<GenotypeLikelihoodsCalculationModel.Model, GenotypeLikelihoodsCalculationModel> glcm = new HashMap<GenotypeLikelihoodsCalculationModel.Model, GenotypeLikelihoodsCalculationModel>();
|
||||
glcm.put(GenotypeLikelihoodsCalculationModel.Model.SNP, new SNPGenotypeLikelihoodsCalculationModel(UAC, logger));
|
||||
glcm.put(GenotypeLikelihoodsCalculationModel.Model.INDEL, new IndelGenotypeLikelihoodsCalculationModel(UAC, logger));
|
||||
private static Map<String,GenotypeLikelihoodsCalculationModel> getGenotypeLikelihoodsCalculationObject(Logger logger, UnifiedArgumentCollection UAC) {
|
||||
|
||||
|
||||
Map<String, GenotypeLikelihoodsCalculationModel> glcm = new HashMap<String, GenotypeLikelihoodsCalculationModel>();
|
||||
// GenotypeLikelihoodsCalculationModel.Model.
|
||||
List<Class<? extends GenotypeLikelihoodsCalculationModel>> glmClasses = new PluginManager<GenotypeLikelihoodsCalculationModel>(GenotypeLikelihoodsCalculationModel.class).getPlugins();
|
||||
|
||||
for (int i = 0; i < glmClasses.size(); i++) {
|
||||
Class<? extends GenotypeLikelihoodsCalculationModel> glmClass = glmClasses.get(i);
|
||||
String key = glmClass.getSimpleName().replaceAll("GenotypeLikelihoodsCalculationModel","").toUpperCase();
|
||||
//System.out.println("KEY:"+key+"\t" + glmClass.getSimpleName());
|
||||
try {
|
||||
Object args[] = new Object[]{UAC,logger};
|
||||
Constructor c = glmClass.getDeclaredConstructor(UnifiedArgumentCollection.class, Logger.class);
|
||||
glcm.put(key, (GenotypeLikelihoodsCalculationModel)c.newInstance(args));
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new UserException("Incorrect specification for argument glm:"+UAC.GLmodel+e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
return glcm;
|
||||
}
|
||||
|
||||
private static AlleleFrequencyCalculationModel getAlleleFrequencyCalculationObject(int N, Logger logger, PrintStream verboseWriter, UnifiedArgumentCollection UAC) {
|
||||
AlleleFrequencyCalculationModel afcm;
|
||||
switch ( UAC.AFmodel ) {
|
||||
case EXACT:
|
||||
afcm = new ExactAFCalculationModel(UAC, N, logger, verboseWriter);
|
||||
break;
|
||||
default: throw new IllegalArgumentException("Unexpected AlleleFrequencyCalculationModel " + UAC.AFmodel);
|
||||
}
|
||||
List<Class<? extends AlleleFrequencyCalculationModel>> afClasses = new PluginManager<AlleleFrequencyCalculationModel>(AlleleFrequencyCalculationModel.class).getPlugins();
|
||||
|
||||
return afcm;
|
||||
for (int i = 0; i < afClasses.size(); i++) {
|
||||
Class<? extends AlleleFrequencyCalculationModel> afClass = afClasses.get(i);
|
||||
String key = afClass.getSimpleName().replace("AFCalculationModel","").toUpperCase();
|
||||
if (UAC.AFmodel.name().equalsIgnoreCase(key)) {
|
||||
try {
|
||||
Object args[] = new Object[]{UAC,N,logger,verboseWriter};
|
||||
Constructor c = afClass.getDeclaredConstructor(UnifiedArgumentCollection.class, int.class, Logger.class, PrintStream.class);
|
||||
|
||||
return (AlleleFrequencyCalculationModel)c.newInstance(args);
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new IllegalArgumentException("Unexpected AlleleFrequencyCalculationModel " + UAC.AFmodel);
|
||||
}
|
||||
}
|
||||
}
|
||||
throw new IllegalArgumentException("Unexpected AlleleFrequencyCalculationModel " + UAC.AFmodel);
|
||||
}
|
||||
|
||||
public static VariantContext getVCFromAllelesRod(RefMetaDataTracker tracker, ReferenceContext ref, GenomeLoc loc, boolean requireSNP, Logger logger, final RodBinding<VariantContext> allelesBinding) {
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ import com.google.java.contract.Ensures;
|
|||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
|
|
@ -48,6 +49,7 @@ public class MathUtils {
|
|||
}
|
||||
|
||||
public static final double[] log10Cache;
|
||||
public static final double[] log10FactorialCache;
|
||||
private static final double[] jacobianLogTable;
|
||||
private static final double JACOBIAN_LOG_TABLE_STEP = 0.001;
|
||||
private static final double JACOBIAN_LOG_TABLE_INV_STEP = 1.0 / 0.001;
|
||||
|
|
@ -58,11 +60,14 @@ public class MathUtils {
|
|||
|
||||
static {
|
||||
log10Cache = new double[LOG10_CACHE_SIZE];
|
||||
log10FactorialCache = new double[LOG10_CACHE_SIZE];
|
||||
jacobianLogTable = new double[JACOBIAN_LOG_TABLE_SIZE];
|
||||
|
||||
log10Cache[0] = Double.NEGATIVE_INFINITY;
|
||||
for (int k = 1; k < LOG10_CACHE_SIZE; k++)
|
||||
for (int k = 1; k < LOG10_CACHE_SIZE; k++) {
|
||||
log10Cache[k] = Math.log10(k);
|
||||
log10FactorialCache[k] = log10FactorialCache[k-1] + log10Cache[k];
|
||||
}
|
||||
|
||||
for (int k = 0; k < JACOBIAN_LOG_TABLE_SIZE; k++) {
|
||||
jacobianLogTable[k] = Math.log10(1.0 + Math.pow(10.0, -((double) k) * JACOBIAN_LOG_TABLE_STEP));
|
||||
|
|
@ -233,6 +238,9 @@ public class MathUtils {
|
|||
double sum = 0.0;
|
||||
|
||||
double maxValue = Utils.findMaxEntry(log10p);
|
||||
if(maxValue == Double.NEGATIVE_INFINITY)
|
||||
return sum;
|
||||
|
||||
for (int i = start; i < finish; i++) {
|
||||
sum += Math.pow(10.0, log10p[i] - maxValue);
|
||||
}
|
||||
|
|
@ -1046,6 +1054,28 @@ public class MathUtils {
|
|||
|
||||
}
|
||||
|
||||
/**
|
||||
* Given two log-probability vectors, compute log of vector product of them:
|
||||
* in Matlab notation, return log10(10.*x'*10.^y)
|
||||
* @param x vector 1
|
||||
* @param y vector 2
|
||||
* @return a double representing log (dotProd(10.^x,10.^y)
|
||||
*/
|
||||
public static double logDotProduct(double [] x, double[] y) {
|
||||
if (x.length != y.length)
|
||||
throw new ReviewedStingException("BUG: Vectors of different lengths");
|
||||
|
||||
double tmpVec[] = new double[x.length];
|
||||
|
||||
for (int k=0; k < tmpVec.length; k++ ) {
|
||||
tmpVec[k] = x[k]+y[k];
|
||||
}
|
||||
|
||||
return log10sumLog10(tmpVec);
|
||||
|
||||
|
||||
|
||||
}
|
||||
public static Object getMedian(List<Comparable> list) {
|
||||
return orderStatisticSearch((int) Math.ceil(list.size() / 2), list);
|
||||
}
|
||||
|
|
@ -1485,6 +1515,24 @@ public class MathUtils {
|
|||
return result;
|
||||
}
|
||||
|
||||
/** Same routine, unboxed types for efficiency
|
||||
*
|
||||
* @param x
|
||||
* @param y
|
||||
* @return Vector of same length as x and y so that z[k] = x[k]+y[k]
|
||||
*/
|
||||
public static double[] vectorSum(double[]x, double[] y) {
|
||||
if (x.length != y.length)
|
||||
throw new ReviewedStingException("BUG: Lengths of x and y must be the same");
|
||||
|
||||
double[] result = new double[x.length];
|
||||
for (int k=0; k <x.length; k++)
|
||||
result[k] = x[k]+y[k];
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
public static <E extends Number> Double[] scalarTimesVector(E a, E[] v1) {
|
||||
|
||||
Double result[] = new Double[v1.length];
|
||||
|
|
|
|||
|
|
@ -34,21 +34,22 @@ import java.io.IOException;
|
|||
import java.io.PrintStream;
|
||||
|
||||
public class GATKReportUnitTest extends BaseTest {
|
||||
@Test(enabled = false)
|
||||
@Test
|
||||
public void testParse() throws Exception {
|
||||
String reportPath = validationDataLocation + "exampleGATKReportv1.tbl";
|
||||
GATKReport report = new GATKReport(reportPath);
|
||||
Assert.assertEquals(report.getVersion(), GATKReportVersion.V1_0);
|
||||
Assert.assertEquals(report.getTables().size(), 5);
|
||||
|
||||
GATKReportTable countVariants = report.getTable("CountVariants");
|
||||
//Assert.assertEquals(countVariants.getVersion(), GATKReportVersion.V0_1);
|
||||
Object countVariantsPK = countVariants.getPrimaryKey("none.eval.none.all");
|
||||
Assert.assertEquals(countVariants.get(countVariantsPK, "nProcessedLoci"), "100000");
|
||||
Assert.assertEquals(countVariants.get(countVariantsPK, "nNoCalls"), "99872");
|
||||
Object countVariantsPK = countVariants.getPrimaryKeyByData("dbsnp.eval.none.all");
|
||||
Assert.assertEquals(countVariants.get(countVariantsPK, "nProcessedLoci"), "63025520");
|
||||
Assert.assertEquals(countVariants.get(countVariantsPK, "nNoCalls"), "0");
|
||||
Assert.assertEquals(countVariants.get(countVariantsPK, "heterozygosity"), 4.73e-06);
|
||||
|
||||
GATKReportTable validationReport = report.getTable("ValidationReport");
|
||||
//Assert.assertEquals(validationReport.getVersion(), GATKReportVersion.V0_1);
|
||||
Object validationReportPK = countVariants.getPrimaryKey("none.eval.none.known");
|
||||
Assert.assertEquals(validationReport.get(validationReportPK, "sensitivity"), "NaN");
|
||||
Object validationReportPK = countVariants.getPrimaryKeyByData("dbsnp.eval.none.novel");
|
||||
Assert.assertEquals(validationReport.get(validationReportPK, "PPV"), Double.NaN);
|
||||
}
|
||||
|
||||
@DataProvider(name = "rightAlignValues")
|
||||
|
|
@ -117,15 +118,15 @@ public class GATKReportUnitTest extends BaseTest {
|
|||
report1.addTable("TableName", "Description");
|
||||
report1.getTable("TableName").addPrimaryKey("id", displayPK);
|
||||
report1.getTable("TableName").addColumn("colA", GATKReportDataType.String.getDefaultValue(), "%s");
|
||||
report1.getTable("TableName").addColumn("colB", GATKReportDataType.Byte.getDefaultValue(), "%c");
|
||||
report1.getTable("TableName").addColumn("colB", GATKReportDataType.Character.getDefaultValue(), "%c");
|
||||
report1.getTable("TableName").set(1, "colA", "NotNum");
|
||||
report1.getTable("TableName").set(1, "colB", (byte) 64);
|
||||
report1.getTable("TableName").set(1, "colB", (char) 64);
|
||||
|
||||
report2 = new GATKReport();
|
||||
report2.addTable("TableName", "Description");
|
||||
report2.getTable("TableName").addPrimaryKey("id", displayPK);
|
||||
report2.getTable("TableName").addColumn("colA", GATKReportDataType.String.getDefaultValue(), "%s");
|
||||
report2.getTable("TableName").addColumn("colB", GATKReportDataType.Byte.getDefaultValue(), "%c");
|
||||
report2.getTable("TableName").addColumn("colB", GATKReportDataType.Character.getDefaultValue(), "%c");
|
||||
report2.getTable("TableName").set(2, "colA", "df3");
|
||||
report2.getTable("TableName").set(2, "colB", 'A');
|
||||
|
||||
|
|
@ -133,7 +134,7 @@ public class GATKReportUnitTest extends BaseTest {
|
|||
report3.addTable("TableName", "Description");
|
||||
report3.getTable("TableName").addPrimaryKey("id", displayPK);
|
||||
report3.getTable("TableName").addColumn("colA", GATKReportDataType.String.getDefaultValue(), "%s");
|
||||
report3.getTable("TableName").addColumn("colB", GATKReportDataType.Byte.getDefaultValue(), "%c");
|
||||
report3.getTable("TableName").addColumn("colB", GATKReportDataType.Character.getDefaultValue(), "%c");
|
||||
report3.getTable("TableName").set(3, "colA", "df5f");
|
||||
report3.getTable("TableName").set(3, "colB", 'c');
|
||||
|
||||
|
|
@ -146,13 +147,13 @@ public class GATKReportUnitTest extends BaseTest {
|
|||
table.addColumn("SomeInt", GATKReportDataType.Integer.getDefaultValue(), true, "%d");
|
||||
table.addColumn("SomeFloat", GATKReportDataType.Decimal.getDefaultValue(), true, "%.16E");
|
||||
table.addColumn("TrueFalse", false, true, "%B");
|
||||
table.set("12df", "SomeInt", 34);
|
||||
table.set("12df", "SomeInt", Byte.MAX_VALUE);
|
||||
table.set("12df", "SomeFloat", 34.0);
|
||||
table.set("12df", "TrueFalse", true);
|
||||
table.set("5f", "SomeInt", -1);
|
||||
table.set("5f", "SomeFloat", 0.000003);
|
||||
table.set("5f", "SomeInt", Short.MAX_VALUE);
|
||||
table.set("5f", "SomeFloat", Double.MAX_VALUE);
|
||||
table.set("5f", "TrueFalse", false);
|
||||
table.set("RZ", "SomeInt", 904948230958203958L);
|
||||
table.set("RZ", "SomeInt", Long.MAX_VALUE);
|
||||
table.set("RZ", "SomeFloat", 535646345.657453464576);
|
||||
table.set("RZ", "TrueFalse", true);
|
||||
|
||||
|
|
|
|||
|
|
@ -284,6 +284,18 @@ public class MathUtilsUnitTest extends BaseTest {
|
|||
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[] {-1.0, -3.0, -1.0, -2.0}), new double[] {0.1 * 1.0 / 0.211, 0.001 * 1.0 / 0.211, 0.1 * 1.0 / 0.211, 0.01 * 1.0 / 0.211}));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDotProduct() {
|
||||
Assert.assertEquals(MathUtils.dotProduct(new Double[]{-5.0,-3.0,2.0}, new Double[]{6.0,7.0,8.0}),-35.0,1e-3);
|
||||
Assert.assertEquals(MathUtils.dotProduct(new Double[]{-5.0}, new Double[]{6.0}),-30.0,1e-3);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLogDotProduct() {
|
||||
Assert.assertEquals(MathUtils.logDotProduct(new double[]{-5.0,-3.0,2.0}, new double[]{6.0,7.0,8.0}),10.0,1e-3);
|
||||
Assert.assertEquals(MathUtils.logDotProduct(new double[]{-5.0}, new double[]{6.0}),1.0,1e-3);
|
||||
}
|
||||
|
||||
/**
|
||||
* Private function used by testNormalizeFromLog10()
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -136,7 +136,7 @@ object PipelineTest extends BaseTest with Logging {
|
|||
println(" value (min,target,max) table key metric")
|
||||
for (validation <- evalSpec.validations) {
|
||||
val table = report.getTable(validation.table)
|
||||
val key = table.getPrimaryKey(validation.key)
|
||||
val key = table.getPrimaryKeyByData(validation.key)
|
||||
val value = String.valueOf(table.get(key, validation.metric))
|
||||
val inRange = if (value == null) false else validation.inRange(value)
|
||||
val flag = if (!inRange) "*" else " "
|
||||
|
|
|
|||
Loading…
Reference in New Issue