Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Ryan Poplin 2012-03-28 15:37:33 -04:00
commit 4a396d357d
15 changed files with 212 additions and 117 deletions

View File

@ -955,8 +955,8 @@
<jvmarg value="-Dpipeline.run=${pipeline.run}" /> <jvmarg value="-Dpipeline.run=${pipeline.run}" />
<jvmarg value="-Djava.io.tmpdir=${java.io.tmpdir}" /> <jvmarg value="-Djava.io.tmpdir=${java.io.tmpdir}" />
<jvmarg line="${cofoja.jvm.args}"/> <jvmarg line="${cofoja.jvm.args}"/>
<!-- <jvmarg value="-Xdebug"/> --> <!-- <jvmarg value="-Xdebug"/> -->
<!-- <jvmarg value="-Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=5005"/> --> <!-- <jvmarg value="-Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=5678"/> -->
<classfileset dir="${java.public.test.classes}" includes="**/@{testtype}.class"/> <classfileset dir="${java.public.test.classes}" includes="**/@{testtype}.class"/>
<classfileset dir="${java.private.test.classes}" erroronmissingdir="false"> <classfileset dir="${java.private.test.classes}" erroronmissingdir="false">

View File

@ -177,11 +177,8 @@ public class GATKReport {
*/ */
public void print(PrintStream out) { public void print(PrintStream out) {
out.println(GATKREPORT_HEADER_PREFIX + getVersion().toString() + SEPARATOR + getTables().size()); out.println(GATKREPORT_HEADER_PREFIX + getVersion().toString() + SEPARATOR + getTables().size());
for (GATKReportTable table : tables.values()) { for (GATKReportTable table : tables.values())
if (table.getNumRows() > 0) { table.write(out);
table.write(out);
}
}
} }
public Collection<GATKReportTable> getTables() { public Collection<GATKReportTable> getTables() {

View File

@ -48,9 +48,9 @@ public enum GATKReportDataType {
Boolean("%[Bb]"), Boolean("%[Bb]"),
/** /**
* Used for byte and char value. Will display as a char so use printable values! * Used for char values. Will display as a char so use printable values!
*/ */
Byte("%[Cc]"), Character("%[Cc]"),
/** /**
* Used for float and double values. Will output a decimal with format %.8f unless otherwise specified. * Used for float and double values. Will output a decimal with format %.8f unless otherwise specified.
@ -58,7 +58,7 @@ public enum GATKReportDataType {
Decimal("%.*[EeFf]"), Decimal("%.*[EeFf]"),
/** /**
* Used for int, and long values. Will display the full number by default. * Used for int, byte, short, and long values. Will display the full number by default.
*/ */
Integer("%[Dd]"), Integer("%[Dd]"),
@ -97,17 +97,26 @@ public enum GATKReportDataType {
GATKReportDataType value; GATKReportDataType value;
if (object instanceof Boolean) { if (object instanceof Boolean) {
value = GATKReportDataType.Boolean; value = GATKReportDataType.Boolean;
} else if (object instanceof Byte || object instanceof Character) {
value = GATKReportDataType.Byte; } else if (object instanceof Character) {
} else if (object instanceof Float || object instanceof Double) { value = GATKReportDataType.Character;
} else if (object instanceof Float ||
object instanceof Double) {
value = GATKReportDataType.Decimal; value = GATKReportDataType.Decimal;
} else if (object instanceof Integer || object instanceof Long) {
} else if (object instanceof Integer ||
object instanceof Long ||
object instanceof Short ||
object instanceof Byte ) {
value = GATKReportDataType.Integer; value = GATKReportDataType.Integer;
} else if (object instanceof String) { } else if (object instanceof String) {
value = GATKReportDataType.String; value = GATKReportDataType.String;
} else { } else {
value = GATKReportDataType.Unknown; value = GATKReportDataType.Unknown;
//throw new ReviewedStingException("GATKReport could not convert the data object into a GATKReportDataType. Acceptable data objects are found in the documentation."); //throw new UserException("GATKReport could not convert the data object into a GATKReportDataType. Acceptable data objects are found in the documentation.");
} }
return value; return value;
} }
@ -140,8 +149,8 @@ public enum GATKReportDataType {
return 0.0D; return 0.0D;
case Boolean: case Boolean:
return false; return false;
case Byte: case Character:
return (byte) 0; return '0';
case Integer: case Integer:
return 0L; return 0L;
case String: case String:
@ -166,16 +175,7 @@ public enum GATKReportDataType {
case Boolean: case Boolean:
case Integer: case Integer:
return a.toString().equals(b.toString()); return a.toString().equals(b.toString());
case Byte: case Character:
// A mess that checks if the bytes and characters contain the same value
if ((a instanceof Character && b instanceof Character) ||
(a instanceof Byte && b instanceof Byte))
return a.toString().equals(b.toString());
else if (a instanceof Character && b instanceof Byte) {
return ((Character) a).charValue() == ((Byte) b).byteValue();
} else if (a instanceof Byte && b instanceof Character) {
return ((Byte) a).byteValue() == ((Character) b).charValue();
}
case String: case String:
default: default:
return a.equals(b); return a.equals(b);
@ -201,8 +201,8 @@ public enum GATKReportDataType {
return Long.parseLong(str); return Long.parseLong(str);
case String: case String:
return str; return str;
case Byte: case Character:
return (byte) str.toCharArray()[0]; return str.toCharArray()[0];
default: default:
return str; return str;
} }
@ -225,7 +225,7 @@ public enum GATKReportDataType {
return "%d"; return "%d";
case String: case String:
return "%s"; return "%s";
case Byte: case Character:
return "%c"; return "%c";
case Null: case Null:
default: default:

View File

@ -254,7 +254,7 @@ public class GATKReportTable {
* @param dottedColumnValues Period concatenated values. * @param dottedColumnValues Period concatenated values.
* @return The first primary key matching the column values or throws an exception. * @return The first primary key matching the column values or throws an exception.
*/ */
public Object getPrimaryKey(String dottedColumnValues) { public Object getPrimaryKeyByData(String dottedColumnValues) {
Object key = findPrimaryKey(dottedColumnValues); Object key = findPrimaryKey(dottedColumnValues);
if (key == null) if (key == null)
throw new ReviewedStingException("Attempted to get non-existent GATKReportTable key for values: " + dottedColumnValues); throw new ReviewedStingException("Attempted to get non-existent GATKReportTable key for values: " + dottedColumnValues);
@ -411,9 +411,8 @@ public class GATKReportTable {
if (value == null) if (value == null)
value = "null"; value = "null";
// This code is bs. Why am do I have to conform to bad code // This code below is bs. Why am do I have to conform to bad code
// Below is some ode to convert a string into its appropriate type. // Below is some code to convert a string into its appropriate type.
// This is just Roger ranting
// If we got a string but the column is not a String type // If we got a string but the column is not a String type
Object newValue = null; Object newValue = null;
@ -431,7 +430,7 @@ public class GATKReportTable {
} catch (Exception e) { } catch (Exception e) {
} }
} }
if (column.getDataType().equals(GATKReportDataType.Byte) && ((String) value).length() == 1) { if (column.getDataType().equals(GATKReportDataType.Character) && ((String) value).length() == 1) {
newValue = ((String) value).charAt(0); newValue = ((String) value).charAt(0);
} }
@ -816,7 +815,7 @@ public class GATKReportTable {
out.println(); out.println();
} }
out.println(); out.println();
} }
public int getNumRows() { public int getNumRows() {
@ -877,8 +876,6 @@ public class GATKReportTable {
this.set(rowKey, columnKey, toAdd.get(rowKey)); this.set(rowKey, columnKey, toAdd.get(rowKey));
//System.out.printf("Putting row with PK: %s \n", rowKey); //System.out.printf("Putting row with PK: %s \n", rowKey);
} else { } else {
// TODO we should be able to handle combining data by adding, averaging, etc.
this.set(rowKey, columnKey, toAdd.get(rowKey)); this.set(rowKey, columnKey, toAdd.get(rowKey));
System.out.printf("OVERWRITING Row with PK: %s \n", rowKey); System.out.printf("OVERWRITING Row with PK: %s \n", rowKey);

View File

@ -94,8 +94,11 @@ public class RecalibrationReport {
BitSet key = entry.getKey(); BitSet key = entry.getKey();
RecalDatum otherDatum = entry.getValue(); RecalDatum otherDatum = entry.getValue();
RecalDatum thisDatum = thisTable.get(key); RecalDatum thisDatum = thisTable.get(key);
thisDatum.increment(otherDatum); // add the two datum objects into 'this' if (thisDatum == null)
thisDatum.resetCalculatedQualities(); // reset the empirical quality to make sure the user doesn't forget to recalculate it thisDatum = otherDatum; // sometimes the datum in other won't be present in 'this'. So just assign it!
else
thisDatum.increment(otherDatum); // add the two datum objects into 'this'
thisDatum.resetCalculatedQualities(); // reset the empirical quality to make sure the user doesn't forget to recalculate it
} }
} }
} }

View File

@ -41,7 +41,8 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
public enum Model { public enum Model {
/** The default model with the best performance in all cases */ /** The default model with the best performance in all cases */
EXACT EXACT,
POOL
} }
protected int N; protected int N;

View File

@ -152,7 +152,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
@Override @Override
public boolean equals(Object obj) { public boolean equals(Object obj) {
return (obj instanceof ExactACcounts) ? Arrays.equals(counts, ((ExactACcounts)obj).counts) : false; return (obj instanceof ExactACcounts) && Arrays.equals(counts, ((ExactACcounts)obj).counts);
} }
@Override @Override
@ -202,7 +202,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
} }
public boolean equals(Object obj) { public boolean equals(Object obj) {
return (obj instanceof ExactACset) ? ACcounts.equals(((ExactACset)obj).ACcounts) : false; return (obj instanceof ExactACset) && ACcounts.equals(((ExactACset)obj).ACcounts);
} }
} }

View File

@ -47,9 +47,17 @@ import java.util.Map;
*/ */
public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable { public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
/* public enum Model {
SNP,
INDEL,
BOTH
}
*/
public enum Model { public enum Model {
SNP, SNP,
INDEL, INDEL,
POOLSNP,
POOLINDEL,
BOTH BOTH
} }
@ -60,7 +68,7 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
GENOTYPE_GIVEN_ALLELES GENOTYPE_GIVEN_ALLELES
} }
protected UnifiedArgumentCollection UAC; protected final UnifiedArgumentCollection UAC;
protected Logger logger; protected Logger logger;
/** /**
@ -70,7 +78,7 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
*/ */
protected GenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) { protected GenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
if ( logger == null || UAC == null ) throw new ReviewedStingException("Bad arguments"); if ( logger == null || UAC == null ) throw new ReviewedStingException("Bad arguments");
this.UAC = UAC.clone(); this.UAC = UAC;
this.logger = logger; this.logger = logger;
} }

View File

@ -38,7 +38,7 @@ public class UnifiedArgumentCollection {
* Controls the model used to calculate the probability that a site is variant plus the various sample genotypes in the data at a given locus. * Controls the model used to calculate the probability that a site is variant plus the various sample genotypes in the data at a given locus.
*/ */
@Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ -- EXACT is the default option, while GRID_SEARCH is also available.", required = false) @Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ -- EXACT is the default option, while GRID_SEARCH is also available.", required = false)
public AlleleFrequencyCalculationModel.Model AFmodel = AlleleFrequencyCalculationModel.Model.EXACT; protected AlleleFrequencyCalculationModel.Model AFmodel = AlleleFrequencyCalculationModel.Model.EXACT;
/** /**
* The expected heterozygosity value used to compute prior likelihoods for any locus. The default priors are: * The expected heterozygosity value used to compute prior likelihoods for any locus. The default priors are:

View File

@ -222,7 +222,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tMLE\tMAP"); verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tMLE\tMAP");
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit()); annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples); UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples, 2*samples.size());
// initialize the header // initialize the header
Set<VCFHeaderLine> headerInfo = getHeaderInfo(); Set<VCFHeaderLine> headerInfo = getHeaderInfo();

View File

@ -36,14 +36,17 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variantcontext.*; import org.broadinstitute.sting.utils.variantcontext.*;
import java.io.PrintStream; import java.io.PrintStream;
import java.lang.reflect.Constructor;
import java.util.*; import java.util.*;
public class UnifiedGenotyperEngine { public class UnifiedGenotyperEngine {
@ -71,7 +74,7 @@ public class UnifiedGenotyperEngine {
private final VariantAnnotatorEngine annotationEngine; private final VariantAnnotatorEngine annotationEngine;
// the model used for calculating genotypes // the model used for calculating genotypes
private ThreadLocal<Map<GenotypeLikelihoodsCalculationModel.Model, GenotypeLikelihoodsCalculationModel>> glcm = new ThreadLocal<Map<GenotypeLikelihoodsCalculationModel.Model, GenotypeLikelihoodsCalculationModel>>(); private ThreadLocal<Map<String, GenotypeLikelihoodsCalculationModel>> glcm = new ThreadLocal<Map<String, GenotypeLikelihoodsCalculationModel>>();
// the model used for calculating p(non-ref) // the model used for calculating p(non-ref)
private ThreadLocal<AlleleFrequencyCalculationModel> afcm = new ThreadLocal<AlleleFrequencyCalculationModel>(); private ThreadLocal<AlleleFrequencyCalculationModel> afcm = new ThreadLocal<AlleleFrequencyCalculationModel>();
@ -112,22 +115,22 @@ public class UnifiedGenotyperEngine {
// --------------------------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------------------------
@Requires({"toolkit != null", "UAC != null"}) @Requires({"toolkit != null", "UAC != null"})
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) { public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) {
this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader())); this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), 2*(SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()).size()));
} }
@Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0"}) @Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0","N>0"})
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, Set<String> samples) { public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, Set<String> samples, int N) {
this.BAQEnabledOnCMDLine = toolkit.getArguments().BAQMode != BAQ.CalculationMode.OFF; this.BAQEnabledOnCMDLine = toolkit.getArguments().BAQMode != BAQ.CalculationMode.OFF;
genomeLocParser = toolkit.getGenomeLocParser(); genomeLocParser = toolkit.getGenomeLocParser();
this.samples = new TreeSet<String>(samples); this.samples = new TreeSet<String>(samples);
// note that, because we cap the base quality by the mapping quality, minMQ cannot be less than minBQ // note that, because we cap the base quality by the mapping quality, minMQ cannot be less than minBQ
this.UAC = UAC.clone(); this.UAC = UAC;
this.logger = logger; this.logger = logger;
this.verboseWriter = verboseWriter; this.verboseWriter = verboseWriter;
this.annotationEngine = engine; this.annotationEngine = engine;
N = 2 * this.samples.size(); this.N = N;
log10AlleleFrequencyPriorsSNPs = new double[N+1]; log10AlleleFrequencyPriorsSNPs = new double[N+1];
log10AlleleFrequencyPriorsIndels = new double[N+1]; log10AlleleFrequencyPriorsIndels = new double[N+1];
computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, UAC.heterozygosity); computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, UAC.heterozygosity);
@ -219,7 +222,7 @@ public class UnifiedGenotyperEngine {
glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC)); glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC));
} }
return glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser); return glcm.get().get(model.name()).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser);
} }
private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) { private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
@ -433,7 +436,7 @@ public class UnifiedGenotyperEngine {
if ( !BaseUtils.isRegularBase( refContext.getBase() ) ) if ( !BaseUtils.isRegularBase( refContext.getBase() ) )
return null; return null;
if ( model == GenotypeLikelihoodsCalculationModel.Model.INDEL ) { if ( model.name().toUpperCase().contains("INDEL")) {
if (UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) { if (UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
// regular pileup in this case // regular pileup in this case
@ -463,7 +466,7 @@ public class UnifiedGenotyperEngine {
// stratify the AlignmentContext and cut by sample // stratify the AlignmentContext and cut by sample
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup); stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
} }
} else if ( model == GenotypeLikelihoodsCalculationModel.Model.SNP ) { } else if ( model.name().toUpperCase().contains("SNP") ) {
// stratify the AlignmentContext and cut by sample // stratify the AlignmentContext and cut by sample
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(rawContext.getBasePileup()); stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(rawContext.getBasePileup());
@ -594,21 +597,27 @@ public class UnifiedGenotyperEngine {
return null; return null;
if (vcInput.isSNP()) { if (vcInput.isSNP()) {
if (( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP)) if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH )
return GenotypeLikelihoodsCalculationModel.Model.SNP; return GenotypeLikelihoodsCalculationModel.Model.SNP;
else if ( UAC.GLmodel.name().toUpperCase().contains("SNP"))
return UAC.GLmodel;
else else
// ignore SNP's if user chose INDEL mode // ignore SNP's if user chose INDEL mode
return null; return null;
} }
else if ((vcInput.isIndel() || vcInput.isMixed()) && (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL)) else if ((vcInput.isIndel() || vcInput.isMixed())) {
return GenotypeLikelihoodsCalculationModel.Model.INDEL; if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH )
return GenotypeLikelihoodsCalculationModel.Model.INDEL;
else if (UAC.GLmodel.name().toUpperCase().contains("INDEL"))
return UAC.GLmodel;
}
} }
else { else {
// todo - this assumes SNP's take priority when BOTH is selected, should do a smarter way once extended events are removed // todo - this assumes SNP's take priority when BOTH is selected, should do a smarter way once extended events are removed
if( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP) if( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH )
return GenotypeLikelihoodsCalculationModel.Model.SNP; return GenotypeLikelihoodsCalculationModel.Model.SNP;
else if (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL) else if (UAC.GLmodel.name().toUpperCase().contains("SNP") || UAC.GLmodel.name().toUpperCase().contains("INDEL"))
return GenotypeLikelihoodsCalculationModel.Model.INDEL; return UAC.GLmodel;
} }
} }
return null; return null;
@ -630,58 +639,77 @@ public class UnifiedGenotyperEngine {
} }
protected double[] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Model model ) { protected double[] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Model model ) {
switch( model ) { if (model.name().toUpperCase().contains("SNP"))
case SNP: return log10AlleleFrequencyPriorsSNPs;
return log10AlleleFrequencyPriorsSNPs; else if (model.name().toUpperCase().contains("INDEL"))
case INDEL: return log10AlleleFrequencyPriorsIndels;
return log10AlleleFrequencyPriorsIndels; else
default: throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model); throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
}
} }
private static GenotypePriors createGenotypePriors( final GenotypeLikelihoodsCalculationModel.Model model ) { private static GenotypePriors createGenotypePriors( final GenotypeLikelihoodsCalculationModel.Model model ) {
GenotypePriors priors; GenotypePriors priors;
switch ( model ) { if( model.name().contains("SNP") )
case SNP: priors = new DiploidSNPGenotypePriors();
// use flat priors for GLs else if( model.name().contains("INDEL") )
priors = new DiploidSNPGenotypePriors(); priors = new DiploidIndelGenotypePriors();
break; else throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
case INDEL:
// create flat priors for Indels, actual priors will depend on event length to be genotyped
priors = new DiploidIndelGenotypePriors();
break;
default: throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
}
return priors; return priors;
} }
protected GenotypePriors getGenotypePriors( final GenotypeLikelihoodsCalculationModel.Model model ) { protected GenotypePriors getGenotypePriors( final GenotypeLikelihoodsCalculationModel.Model model ) {
switch( model ) { if( model.name().contains("SNP") )
case SNP: return genotypePriorsSNPs;
return genotypePriorsSNPs; if( model.name().contains("INDEL") )
case INDEL: return genotypePriorsIndels;
return genotypePriorsIndels; else throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
default: throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
}
} }
private static Map<GenotypeLikelihoodsCalculationModel.Model, GenotypeLikelihoodsCalculationModel> getGenotypeLikelihoodsCalculationObject(Logger logger, UnifiedArgumentCollection UAC) { private static Map<String,GenotypeLikelihoodsCalculationModel> getGenotypeLikelihoodsCalculationObject(Logger logger, UnifiedArgumentCollection UAC) {
Map<GenotypeLikelihoodsCalculationModel.Model, GenotypeLikelihoodsCalculationModel> glcm = new HashMap<GenotypeLikelihoodsCalculationModel.Model, GenotypeLikelihoodsCalculationModel>();
glcm.put(GenotypeLikelihoodsCalculationModel.Model.SNP, new SNPGenotypeLikelihoodsCalculationModel(UAC, logger));
glcm.put(GenotypeLikelihoodsCalculationModel.Model.INDEL, new IndelGenotypeLikelihoodsCalculationModel(UAC, logger)); Map<String, GenotypeLikelihoodsCalculationModel> glcm = new HashMap<String, GenotypeLikelihoodsCalculationModel>();
// GenotypeLikelihoodsCalculationModel.Model.
List<Class<? extends GenotypeLikelihoodsCalculationModel>> glmClasses = new PluginManager<GenotypeLikelihoodsCalculationModel>(GenotypeLikelihoodsCalculationModel.class).getPlugins();
for (int i = 0; i < glmClasses.size(); i++) {
Class<? extends GenotypeLikelihoodsCalculationModel> glmClass = glmClasses.get(i);
String key = glmClass.getSimpleName().replaceAll("GenotypeLikelihoodsCalculationModel","").toUpperCase();
//System.out.println("KEY:"+key+"\t" + glmClass.getSimpleName());
try {
Object args[] = new Object[]{UAC,logger};
Constructor c = glmClass.getDeclaredConstructor(UnifiedArgumentCollection.class, Logger.class);
glcm.put(key, (GenotypeLikelihoodsCalculationModel)c.newInstance(args));
}
catch (Exception e) {
throw new UserException("Incorrect specification for argument glm:"+UAC.GLmodel+e.getMessage());
}
}
return glcm; return glcm;
} }
private static AlleleFrequencyCalculationModel getAlleleFrequencyCalculationObject(int N, Logger logger, PrintStream verboseWriter, UnifiedArgumentCollection UAC) { private static AlleleFrequencyCalculationModel getAlleleFrequencyCalculationObject(int N, Logger logger, PrintStream verboseWriter, UnifiedArgumentCollection UAC) {
AlleleFrequencyCalculationModel afcm; List<Class<? extends AlleleFrequencyCalculationModel>> afClasses = new PluginManager<AlleleFrequencyCalculationModel>(AlleleFrequencyCalculationModel.class).getPlugins();
switch ( UAC.AFmodel ) {
case EXACT:
afcm = new ExactAFCalculationModel(UAC, N, logger, verboseWriter);
break;
default: throw new IllegalArgumentException("Unexpected AlleleFrequencyCalculationModel " + UAC.AFmodel);
}
return afcm; for (int i = 0; i < afClasses.size(); i++) {
Class<? extends AlleleFrequencyCalculationModel> afClass = afClasses.get(i);
String key = afClass.getSimpleName().replace("AFCalculationModel","").toUpperCase();
if (UAC.AFmodel.name().equalsIgnoreCase(key)) {
try {
Object args[] = new Object[]{UAC,N,logger,verboseWriter};
Constructor c = afClass.getDeclaredConstructor(UnifiedArgumentCollection.class, int.class, Logger.class, PrintStream.class);
return (AlleleFrequencyCalculationModel)c.newInstance(args);
}
catch (Exception e) {
throw new IllegalArgumentException("Unexpected AlleleFrequencyCalculationModel " + UAC.AFmodel);
}
}
}
throw new IllegalArgumentException("Unexpected AlleleFrequencyCalculationModel " + UAC.AFmodel);
} }
public static VariantContext getVCFromAllelesRod(RefMetaDataTracker tracker, ReferenceContext ref, GenomeLoc loc, boolean requireSNP, Logger logger, final RodBinding<VariantContext> allelesBinding) { public static VariantContext getVCFromAllelesRod(RefMetaDataTracker tracker, ReferenceContext ref, GenomeLoc loc, boolean requireSNP, Logger logger, final RodBinding<VariantContext> allelesBinding) {

View File

@ -29,6 +29,7 @@ import com.google.java.contract.Ensures;
import com.google.java.contract.Requires; import com.google.java.contract.Requires;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import java.math.BigDecimal; import java.math.BigDecimal;
@ -48,6 +49,7 @@ public class MathUtils {
} }
public static final double[] log10Cache; public static final double[] log10Cache;
public static final double[] log10FactorialCache;
private static final double[] jacobianLogTable; private static final double[] jacobianLogTable;
private static final double JACOBIAN_LOG_TABLE_STEP = 0.001; private static final double JACOBIAN_LOG_TABLE_STEP = 0.001;
private static final double JACOBIAN_LOG_TABLE_INV_STEP = 1.0 / 0.001; private static final double JACOBIAN_LOG_TABLE_INV_STEP = 1.0 / 0.001;
@ -58,11 +60,14 @@ public class MathUtils {
static { static {
log10Cache = new double[LOG10_CACHE_SIZE]; log10Cache = new double[LOG10_CACHE_SIZE];
log10FactorialCache = new double[LOG10_CACHE_SIZE];
jacobianLogTable = new double[JACOBIAN_LOG_TABLE_SIZE]; jacobianLogTable = new double[JACOBIAN_LOG_TABLE_SIZE];
log10Cache[0] = Double.NEGATIVE_INFINITY; log10Cache[0] = Double.NEGATIVE_INFINITY;
for (int k = 1; k < LOG10_CACHE_SIZE; k++) for (int k = 1; k < LOG10_CACHE_SIZE; k++) {
log10Cache[k] = Math.log10(k); log10Cache[k] = Math.log10(k);
log10FactorialCache[k] = log10FactorialCache[k-1] + log10Cache[k];
}
for (int k = 0; k < JACOBIAN_LOG_TABLE_SIZE; k++) { for (int k = 0; k < JACOBIAN_LOG_TABLE_SIZE; k++) {
jacobianLogTable[k] = Math.log10(1.0 + Math.pow(10.0, -((double) k) * JACOBIAN_LOG_TABLE_STEP)); jacobianLogTable[k] = Math.log10(1.0 + Math.pow(10.0, -((double) k) * JACOBIAN_LOG_TABLE_STEP));
@ -233,6 +238,9 @@ public class MathUtils {
double sum = 0.0; double sum = 0.0;
double maxValue = Utils.findMaxEntry(log10p); double maxValue = Utils.findMaxEntry(log10p);
if(maxValue == Double.NEGATIVE_INFINITY)
return sum;
for (int i = start; i < finish; i++) { for (int i = start; i < finish; i++) {
sum += Math.pow(10.0, log10p[i] - maxValue); sum += Math.pow(10.0, log10p[i] - maxValue);
} }
@ -1046,6 +1054,28 @@ public class MathUtils {
} }
/**
* Given two log-probability vectors, compute log of vector product of them:
* in Matlab notation, return log10(10.*x'*10.^y)
* @param x vector 1
* @param y vector 2
* @return a double representing log (dotProd(10.^x,10.^y)
*/
public static double logDotProduct(double [] x, double[] y) {
if (x.length != y.length)
throw new ReviewedStingException("BUG: Vectors of different lengths");
double tmpVec[] = new double[x.length];
for (int k=0; k < tmpVec.length; k++ ) {
tmpVec[k] = x[k]+y[k];
}
return log10sumLog10(tmpVec);
}
public static Object getMedian(List<Comparable> list) { public static Object getMedian(List<Comparable> list) {
return orderStatisticSearch((int) Math.ceil(list.size() / 2), list); return orderStatisticSearch((int) Math.ceil(list.size() / 2), list);
} }
@ -1485,6 +1515,24 @@ public class MathUtils {
return result; return result;
} }
/** Same routine, unboxed types for efficiency
*
* @param x
* @param y
* @return Vector of same length as x and y so that z[k] = x[k]+y[k]
*/
public static double[] vectorSum(double[]x, double[] y) {
if (x.length != y.length)
throw new ReviewedStingException("BUG: Lengths of x and y must be the same");
double[] result = new double[x.length];
for (int k=0; k <x.length; k++)
result[k] = x[k]+y[k];
return result;
}
public static <E extends Number> Double[] scalarTimesVector(E a, E[] v1) { public static <E extends Number> Double[] scalarTimesVector(E a, E[] v1) {
Double result[] = new Double[v1.length]; Double result[] = new Double[v1.length];

View File

@ -34,21 +34,22 @@ import java.io.IOException;
import java.io.PrintStream; import java.io.PrintStream;
public class GATKReportUnitTest extends BaseTest { public class GATKReportUnitTest extends BaseTest {
@Test(enabled = false) @Test
public void testParse() throws Exception { public void testParse() throws Exception {
String reportPath = validationDataLocation + "exampleGATKReportv1.tbl"; String reportPath = validationDataLocation + "exampleGATKReportv1.tbl";
GATKReport report = new GATKReport(reportPath); GATKReport report = new GATKReport(reportPath);
Assert.assertEquals(report.getVersion(), GATKReportVersion.V1_0);
Assert.assertEquals(report.getTables().size(), 5);
GATKReportTable countVariants = report.getTable("CountVariants"); GATKReportTable countVariants = report.getTable("CountVariants");
//Assert.assertEquals(countVariants.getVersion(), GATKReportVersion.V0_1); Object countVariantsPK = countVariants.getPrimaryKeyByData("dbsnp.eval.none.all");
Object countVariantsPK = countVariants.getPrimaryKey("none.eval.none.all"); Assert.assertEquals(countVariants.get(countVariantsPK, "nProcessedLoci"), "63025520");
Assert.assertEquals(countVariants.get(countVariantsPK, "nProcessedLoci"), "100000"); Assert.assertEquals(countVariants.get(countVariantsPK, "nNoCalls"), "0");
Assert.assertEquals(countVariants.get(countVariantsPK, "nNoCalls"), "99872"); Assert.assertEquals(countVariants.get(countVariantsPK, "heterozygosity"), 4.73e-06);
GATKReportTable validationReport = report.getTable("ValidationReport"); GATKReportTable validationReport = report.getTable("ValidationReport");
//Assert.assertEquals(validationReport.getVersion(), GATKReportVersion.V0_1); Object validationReportPK = countVariants.getPrimaryKeyByData("dbsnp.eval.none.novel");
Object validationReportPK = countVariants.getPrimaryKey("none.eval.none.known"); Assert.assertEquals(validationReport.get(validationReportPK, "PPV"), Double.NaN);
Assert.assertEquals(validationReport.get(validationReportPK, "sensitivity"), "NaN");
} }
@DataProvider(name = "rightAlignValues") @DataProvider(name = "rightAlignValues")
@ -117,15 +118,15 @@ public class GATKReportUnitTest extends BaseTest {
report1.addTable("TableName", "Description"); report1.addTable("TableName", "Description");
report1.getTable("TableName").addPrimaryKey("id", displayPK); report1.getTable("TableName").addPrimaryKey("id", displayPK);
report1.getTable("TableName").addColumn("colA", GATKReportDataType.String.getDefaultValue(), "%s"); report1.getTable("TableName").addColumn("colA", GATKReportDataType.String.getDefaultValue(), "%s");
report1.getTable("TableName").addColumn("colB", GATKReportDataType.Byte.getDefaultValue(), "%c"); report1.getTable("TableName").addColumn("colB", GATKReportDataType.Character.getDefaultValue(), "%c");
report1.getTable("TableName").set(1, "colA", "NotNum"); report1.getTable("TableName").set(1, "colA", "NotNum");
report1.getTable("TableName").set(1, "colB", (byte) 64); report1.getTable("TableName").set(1, "colB", (char) 64);
report2 = new GATKReport(); report2 = new GATKReport();
report2.addTable("TableName", "Description"); report2.addTable("TableName", "Description");
report2.getTable("TableName").addPrimaryKey("id", displayPK); report2.getTable("TableName").addPrimaryKey("id", displayPK);
report2.getTable("TableName").addColumn("colA", GATKReportDataType.String.getDefaultValue(), "%s"); report2.getTable("TableName").addColumn("colA", GATKReportDataType.String.getDefaultValue(), "%s");
report2.getTable("TableName").addColumn("colB", GATKReportDataType.Byte.getDefaultValue(), "%c"); report2.getTable("TableName").addColumn("colB", GATKReportDataType.Character.getDefaultValue(), "%c");
report2.getTable("TableName").set(2, "colA", "df3"); report2.getTable("TableName").set(2, "colA", "df3");
report2.getTable("TableName").set(2, "colB", 'A'); report2.getTable("TableName").set(2, "colB", 'A');
@ -133,7 +134,7 @@ public class GATKReportUnitTest extends BaseTest {
report3.addTable("TableName", "Description"); report3.addTable("TableName", "Description");
report3.getTable("TableName").addPrimaryKey("id", displayPK); report3.getTable("TableName").addPrimaryKey("id", displayPK);
report3.getTable("TableName").addColumn("colA", GATKReportDataType.String.getDefaultValue(), "%s"); report3.getTable("TableName").addColumn("colA", GATKReportDataType.String.getDefaultValue(), "%s");
report3.getTable("TableName").addColumn("colB", GATKReportDataType.Byte.getDefaultValue(), "%c"); report3.getTable("TableName").addColumn("colB", GATKReportDataType.Character.getDefaultValue(), "%c");
report3.getTable("TableName").set(3, "colA", "df5f"); report3.getTable("TableName").set(3, "colA", "df5f");
report3.getTable("TableName").set(3, "colB", 'c'); report3.getTable("TableName").set(3, "colB", 'c');
@ -146,13 +147,13 @@ public class GATKReportUnitTest extends BaseTest {
table.addColumn("SomeInt", GATKReportDataType.Integer.getDefaultValue(), true, "%d"); table.addColumn("SomeInt", GATKReportDataType.Integer.getDefaultValue(), true, "%d");
table.addColumn("SomeFloat", GATKReportDataType.Decimal.getDefaultValue(), true, "%.16E"); table.addColumn("SomeFloat", GATKReportDataType.Decimal.getDefaultValue(), true, "%.16E");
table.addColumn("TrueFalse", false, true, "%B"); table.addColumn("TrueFalse", false, true, "%B");
table.set("12df", "SomeInt", 34); table.set("12df", "SomeInt", Byte.MAX_VALUE);
table.set("12df", "SomeFloat", 34.0); table.set("12df", "SomeFloat", 34.0);
table.set("12df", "TrueFalse", true); table.set("12df", "TrueFalse", true);
table.set("5f", "SomeInt", -1); table.set("5f", "SomeInt", Short.MAX_VALUE);
table.set("5f", "SomeFloat", 0.000003); table.set("5f", "SomeFloat", Double.MAX_VALUE);
table.set("5f", "TrueFalse", false); table.set("5f", "TrueFalse", false);
table.set("RZ", "SomeInt", 904948230958203958L); table.set("RZ", "SomeInt", Long.MAX_VALUE);
table.set("RZ", "SomeFloat", 535646345.657453464576); table.set("RZ", "SomeFloat", 535646345.657453464576);
table.set("RZ", "TrueFalse", true); table.set("RZ", "TrueFalse", true);

View File

@ -284,6 +284,18 @@ public class MathUtilsUnitTest extends BaseTest {
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[] {-1.0, -3.0, -1.0, -2.0}), new double[] {0.1 * 1.0 / 0.211, 0.001 * 1.0 / 0.211, 0.1 * 1.0 / 0.211, 0.01 * 1.0 / 0.211})); Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[] {-1.0, -3.0, -1.0, -2.0}), new double[] {0.1 * 1.0 / 0.211, 0.001 * 1.0 / 0.211, 0.1 * 1.0 / 0.211, 0.01 * 1.0 / 0.211}));
} }
@Test
public void testDotProduct() {
Assert.assertEquals(MathUtils.dotProduct(new Double[]{-5.0,-3.0,2.0}, new Double[]{6.0,7.0,8.0}),-35.0,1e-3);
Assert.assertEquals(MathUtils.dotProduct(new Double[]{-5.0}, new Double[]{6.0}),-30.0,1e-3);
}
@Test
public void testLogDotProduct() {
Assert.assertEquals(MathUtils.logDotProduct(new double[]{-5.0,-3.0,2.0}, new double[]{6.0,7.0,8.0}),10.0,1e-3);
Assert.assertEquals(MathUtils.logDotProduct(new double[]{-5.0}, new double[]{6.0}),1.0,1e-3);
}
/** /**
* Private function used by testNormalizeFromLog10() * Private function used by testNormalizeFromLog10()
*/ */

View File

@ -136,7 +136,7 @@ object PipelineTest extends BaseTest with Logging {
println(" value (min,target,max) table key metric") println(" value (min,target,max) table key metric")
for (validation <- evalSpec.validations) { for (validation <- evalSpec.validations) {
val table = report.getTable(validation.table) val table = report.getTable(validation.table)
val key = table.getPrimaryKey(validation.key) val key = table.getPrimaryKeyByData(validation.key)
val value = String.valueOf(table.get(key, validation.metric)) val value = String.valueOf(table.get(key, validation.metric))
val inRange = if (value == null) false else validation.inRange(value) val inRange = if (value == null) false else validation.inRange(value)
val flag = if (!inRange) "*" else " " val flag = if (!inRange) "*" else " "