diff --git a/build.xml b/build.xml index d3e25d424..8e9de2272 100644 --- a/build.xml +++ b/build.xml @@ -955,8 +955,8 @@ - - + + diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java index 8fbfa96e9..f2291e5ec 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java @@ -177,11 +177,8 @@ public class GATKReport { */ public void print(PrintStream out) { out.println(GATKREPORT_HEADER_PREFIX + getVersion().toString() + SEPARATOR + getTables().size()); - for (GATKReportTable table : tables.values()) { - if (table.getNumRows() > 0) { - table.write(out); - } - } + for (GATKReportTable table : tables.values()) + table.write(out); } public Collection getTables() { diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportDataType.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportDataType.java index d9bae19c7..6451c5836 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportDataType.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportDataType.java @@ -48,9 +48,9 @@ public enum GATKReportDataType { Boolean("%[Bb]"), /** - * Used for byte and char value. Will display as a char so use printable values! + * Used for char values. Will display as a char so use printable values! */ - Byte("%[Cc]"), + Character("%[Cc]"), /** * Used for float and double values. Will output a decimal with format %.8f unless otherwise specified. @@ -58,7 +58,7 @@ public enum GATKReportDataType { Decimal("%.*[EeFf]"), /** - * Used for int, and long values. Will display the full number by default. + * Used for int, byte, short, and long values. Will display the full number by default. */ Integer("%[Dd]"), @@ -97,17 +97,26 @@ public enum GATKReportDataType { GATKReportDataType value; if (object instanceof Boolean) { value = GATKReportDataType.Boolean; - } else if (object instanceof Byte || object instanceof Character) { - value = GATKReportDataType.Byte; - } else if (object instanceof Float || object instanceof Double) { + + } else if (object instanceof Character) { + value = GATKReportDataType.Character; + + } else if (object instanceof Float || + object instanceof Double) { value = GATKReportDataType.Decimal; - } else if (object instanceof Integer || object instanceof Long) { + + } else if (object instanceof Integer || + object instanceof Long || + object instanceof Short || + object instanceof Byte ) { value = GATKReportDataType.Integer; + } else if (object instanceof String) { value = GATKReportDataType.String; + } else { value = GATKReportDataType.Unknown; - //throw new ReviewedStingException("GATKReport could not convert the data object into a GATKReportDataType. Acceptable data objects are found in the documentation."); + //throw new UserException("GATKReport could not convert the data object into a GATKReportDataType. Acceptable data objects are found in the documentation."); } return value; } @@ -140,8 +149,8 @@ public enum GATKReportDataType { return 0.0D; case Boolean: return false; - case Byte: - return (byte) 0; + case Character: + return '0'; case Integer: return 0L; case String: @@ -166,16 +175,7 @@ public enum GATKReportDataType { case Boolean: case Integer: return a.toString().equals(b.toString()); - case Byte: - // A mess that checks if the bytes and characters contain the same value - if ((a instanceof Character && b instanceof Character) || - (a instanceof Byte && b instanceof Byte)) - return a.toString().equals(b.toString()); - else if (a instanceof Character && b instanceof Byte) { - return ((Character) a).charValue() == ((Byte) b).byteValue(); - } else if (a instanceof Byte && b instanceof Character) { - return ((Byte) a).byteValue() == ((Character) b).charValue(); - } + case Character: case String: default: return a.equals(b); @@ -201,8 +201,8 @@ public enum GATKReportDataType { return Long.parseLong(str); case String: return str; - case Byte: - return (byte) str.toCharArray()[0]; + case Character: + return str.toCharArray()[0]; default: return str; } @@ -225,7 +225,7 @@ public enum GATKReportDataType { return "%d"; case String: return "%s"; - case Byte: + case Character: return "%c"; case Null: default: diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java index e0e3ad1fc..1fe67154e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java @@ -254,7 +254,7 @@ public class GATKReportTable { * @param dottedColumnValues Period concatenated values. * @return The first primary key matching the column values or throws an exception. */ - public Object getPrimaryKey(String dottedColumnValues) { + public Object getPrimaryKeyByData(String dottedColumnValues) { Object key = findPrimaryKey(dottedColumnValues); if (key == null) throw new ReviewedStingException("Attempted to get non-existent GATKReportTable key for values: " + dottedColumnValues); @@ -411,9 +411,8 @@ public class GATKReportTable { if (value == null) value = "null"; - // This code is bs. Why am do I have to conform to bad code - // Below is some ode to convert a string into its appropriate type. - // This is just Roger ranting + // This code below is bs. Why am do I have to conform to bad code + // Below is some code to convert a string into its appropriate type. // If we got a string but the column is not a String type Object newValue = null; @@ -431,7 +430,7 @@ public class GATKReportTable { } catch (Exception e) { } } - if (column.getDataType().equals(GATKReportDataType.Byte) && ((String) value).length() == 1) { + if (column.getDataType().equals(GATKReportDataType.Character) && ((String) value).length() == 1) { newValue = ((String) value).charAt(0); } @@ -816,7 +815,7 @@ public class GATKReportTable { out.println(); } - out.println(); + out.println(); } public int getNumRows() { @@ -877,8 +876,6 @@ public class GATKReportTable { this.set(rowKey, columnKey, toAdd.get(rowKey)); //System.out.printf("Putting row with PK: %s \n", rowKey); } else { - - // TODO we should be able to handle combining data by adding, averaging, etc. this.set(rowKey, columnKey, toAdd.get(rowKey)); System.out.printf("OVERWRITING Row with PK: %s \n", rowKey); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java index ce00240b8..897e1645d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java @@ -94,8 +94,11 @@ public class RecalibrationReport { BitSet key = entry.getKey(); RecalDatum otherDatum = entry.getValue(); RecalDatum thisDatum = thisTable.get(key); - thisDatum.increment(otherDatum); // add the two datum objects into 'this' - thisDatum.resetCalculatedQualities(); // reset the empirical quality to make sure the user doesn't forget to recalculate it + if (thisDatum == null) + thisDatum = otherDatum; // sometimes the datum in other won't be present in 'this'. So just assign it! + else + thisDatum.increment(otherDatum); // add the two datum objects into 'this' + thisDatum.resetCalculatedQualities(); // reset the empirical quality to make sure the user doesn't forget to recalculate it } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java index e1ce2ee18..23d7c0ad6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java @@ -41,7 +41,8 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable { public enum Model { /** The default model with the best performance in all cases */ - EXACT + EXACT, + POOL } protected int N; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index 891159512..4bda3282e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -152,7 +152,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { @Override public boolean equals(Object obj) { - return (obj instanceof ExactACcounts) ? Arrays.equals(counts, ((ExactACcounts)obj).counts) : false; + return (obj instanceof ExactACcounts) && Arrays.equals(counts, ((ExactACcounts)obj).counts); } @Override @@ -202,7 +202,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { } public boolean equals(Object obj) { - return (obj instanceof ExactACset) ? ACcounts.equals(((ExactACset)obj).ACcounts) : false; + return (obj instanceof ExactACset) && ACcounts.equals(((ExactACset)obj).ACcounts); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java index fb2428258..7527e17b6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java @@ -47,9 +47,17 @@ import java.util.Map; */ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable { +/* public enum Model { + SNP, + INDEL, + BOTH + } + */ public enum Model { SNP, INDEL, + POOLSNP, + POOLINDEL, BOTH } @@ -60,7 +68,7 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable { GENOTYPE_GIVEN_ALLELES } - protected UnifiedArgumentCollection UAC; + protected final UnifiedArgumentCollection UAC; protected Logger logger; /** @@ -70,7 +78,7 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable { */ protected GenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) { if ( logger == null || UAC == null ) throw new ReviewedStingException("Bad arguments"); - this.UAC = UAC.clone(); + this.UAC = UAC; this.logger = logger; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 7b8e5c897..58b8af493 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -38,7 +38,7 @@ public class UnifiedArgumentCollection { * Controls the model used to calculate the probability that a site is variant plus the various sample genotypes in the data at a given locus. */ @Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ -- EXACT is the default option, while GRID_SEARCH is also available.", required = false) - public AlleleFrequencyCalculationModel.Model AFmodel = AlleleFrequencyCalculationModel.Model.EXACT; + protected AlleleFrequencyCalculationModel.Model AFmodel = AlleleFrequencyCalculationModel.Model.EXACT; /** * The expected heterozygosity value used to compute prior likelihoods for any locus. The default priors are: diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index a04aef77b..bf482f5d7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -222,7 +222,7 @@ public class UnifiedGenotyper extends LocusWalker headerInfo = getHeaderInfo(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 4f1c64001..39745507c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -36,14 +36,17 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.baq.BAQ; +import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.variantcontext.*; import java.io.PrintStream; +import java.lang.reflect.Constructor; import java.util.*; public class UnifiedGenotyperEngine { @@ -71,7 +74,7 @@ public class UnifiedGenotyperEngine { private final VariantAnnotatorEngine annotationEngine; // the model used for calculating genotypes - private ThreadLocal> glcm = new ThreadLocal>(); + private ThreadLocal> glcm = new ThreadLocal>(); // the model used for calculating p(non-ref) private ThreadLocal afcm = new ThreadLocal(); @@ -112,22 +115,22 @@ public class UnifiedGenotyperEngine { // --------------------------------------------------------------------------------------------------------- @Requires({"toolkit != null", "UAC != null"}) public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) { - this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader())); + this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), 2*(SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()).size())); } - @Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0"}) - public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, Set samples) { + @Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0","N>0"}) + public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, Set samples, int N) { this.BAQEnabledOnCMDLine = toolkit.getArguments().BAQMode != BAQ.CalculationMode.OFF; genomeLocParser = toolkit.getGenomeLocParser(); this.samples = new TreeSet(samples); // note that, because we cap the base quality by the mapping quality, minMQ cannot be less than minBQ - this.UAC = UAC.clone(); + this.UAC = UAC; this.logger = logger; this.verboseWriter = verboseWriter; this.annotationEngine = engine; - N = 2 * this.samples.size(); + this.N = N; log10AlleleFrequencyPriorsSNPs = new double[N+1]; log10AlleleFrequencyPriorsIndels = new double[N+1]; computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, UAC.heterozygosity); @@ -219,7 +222,7 @@ public class UnifiedGenotyperEngine { glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC)); } - return glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser); + return glcm.get().get(model.name()).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser); } private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, AlignmentContext rawContext) { @@ -433,7 +436,7 @@ public class UnifiedGenotyperEngine { if ( !BaseUtils.isRegularBase( refContext.getBase() ) ) return null; - if ( model == GenotypeLikelihoodsCalculationModel.Model.INDEL ) { + if ( model.name().toUpperCase().contains("INDEL")) { if (UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) { // regular pileup in this case @@ -463,7 +466,7 @@ public class UnifiedGenotyperEngine { // stratify the AlignmentContext and cut by sample stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup); } - } else if ( model == GenotypeLikelihoodsCalculationModel.Model.SNP ) { + } else if ( model.name().toUpperCase().contains("SNP") ) { // stratify the AlignmentContext and cut by sample stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(rawContext.getBasePileup()); @@ -594,21 +597,27 @@ public class UnifiedGenotyperEngine { return null; if (vcInput.isSNP()) { - if (( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP)) + if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH ) return GenotypeLikelihoodsCalculationModel.Model.SNP; + else if ( UAC.GLmodel.name().toUpperCase().contains("SNP")) + return UAC.GLmodel; else // ignore SNP's if user chose INDEL mode return null; } - else if ((vcInput.isIndel() || vcInput.isMixed()) && (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL)) - return GenotypeLikelihoodsCalculationModel.Model.INDEL; + else if ((vcInput.isIndel() || vcInput.isMixed())) { + if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH ) + return GenotypeLikelihoodsCalculationModel.Model.INDEL; + else if (UAC.GLmodel.name().toUpperCase().contains("INDEL")) + return UAC.GLmodel; + } } else { // todo - this assumes SNP's take priority when BOTH is selected, should do a smarter way once extended events are removed - if( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP) + if( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH ) return GenotypeLikelihoodsCalculationModel.Model.SNP; - else if (UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL) - return GenotypeLikelihoodsCalculationModel.Model.INDEL; + else if (UAC.GLmodel.name().toUpperCase().contains("SNP") || UAC.GLmodel.name().toUpperCase().contains("INDEL")) + return UAC.GLmodel; } } return null; @@ -630,58 +639,77 @@ public class UnifiedGenotyperEngine { } protected double[] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Model model ) { - switch( model ) { - case SNP: - return log10AlleleFrequencyPriorsSNPs; - case INDEL: - return log10AlleleFrequencyPriorsIndels; - default: throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model); - } + if (model.name().toUpperCase().contains("SNP")) + return log10AlleleFrequencyPriorsSNPs; + else if (model.name().toUpperCase().contains("INDEL")) + return log10AlleleFrequencyPriorsIndels; + else + throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model); + } private static GenotypePriors createGenotypePriors( final GenotypeLikelihoodsCalculationModel.Model model ) { GenotypePriors priors; - switch ( model ) { - case SNP: - // use flat priors for GLs - priors = new DiploidSNPGenotypePriors(); - break; - case INDEL: - // create flat priors for Indels, actual priors will depend on event length to be genotyped - priors = new DiploidIndelGenotypePriors(); - break; - default: throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model); - } + if( model.name().contains("SNP") ) + priors = new DiploidSNPGenotypePriors(); + else if( model.name().contains("INDEL") ) + priors = new DiploidIndelGenotypePriors(); + else throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model); + return priors; } protected GenotypePriors getGenotypePriors( final GenotypeLikelihoodsCalculationModel.Model model ) { - switch( model ) { - case SNP: - return genotypePriorsSNPs; - case INDEL: - return genotypePriorsIndels; - default: throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model); - } + if( model.name().contains("SNP") ) + return genotypePriorsSNPs; + if( model.name().contains("INDEL") ) + return genotypePriorsIndels; + else throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model); } - private static Map getGenotypeLikelihoodsCalculationObject(Logger logger, UnifiedArgumentCollection UAC) { - Map glcm = new HashMap(); - glcm.put(GenotypeLikelihoodsCalculationModel.Model.SNP, new SNPGenotypeLikelihoodsCalculationModel(UAC, logger)); - glcm.put(GenotypeLikelihoodsCalculationModel.Model.INDEL, new IndelGenotypeLikelihoodsCalculationModel(UAC, logger)); + private static Map getGenotypeLikelihoodsCalculationObject(Logger logger, UnifiedArgumentCollection UAC) { + + + Map glcm = new HashMap(); + // GenotypeLikelihoodsCalculationModel.Model. + List> glmClasses = new PluginManager(GenotypeLikelihoodsCalculationModel.class).getPlugins(); + + for (int i = 0; i < glmClasses.size(); i++) { + Class glmClass = glmClasses.get(i); + String key = glmClass.getSimpleName().replaceAll("GenotypeLikelihoodsCalculationModel","").toUpperCase(); + //System.out.println("KEY:"+key+"\t" + glmClass.getSimpleName()); + try { + Object args[] = new Object[]{UAC,logger}; + Constructor c = glmClass.getDeclaredConstructor(UnifiedArgumentCollection.class, Logger.class); + glcm.put(key, (GenotypeLikelihoodsCalculationModel)c.newInstance(args)); + } + catch (Exception e) { + throw new UserException("Incorrect specification for argument glm:"+UAC.GLmodel+e.getMessage()); + } + } + return glcm; } private static AlleleFrequencyCalculationModel getAlleleFrequencyCalculationObject(int N, Logger logger, PrintStream verboseWriter, UnifiedArgumentCollection UAC) { - AlleleFrequencyCalculationModel afcm; - switch ( UAC.AFmodel ) { - case EXACT: - afcm = new ExactAFCalculationModel(UAC, N, logger, verboseWriter); - break; - default: throw new IllegalArgumentException("Unexpected AlleleFrequencyCalculationModel " + UAC.AFmodel); - } + List> afClasses = new PluginManager(AlleleFrequencyCalculationModel.class).getPlugins(); - return afcm; + for (int i = 0; i < afClasses.size(); i++) { + Class afClass = afClasses.get(i); + String key = afClass.getSimpleName().replace("AFCalculationModel","").toUpperCase(); + if (UAC.AFmodel.name().equalsIgnoreCase(key)) { + try { + Object args[] = new Object[]{UAC,N,logger,verboseWriter}; + Constructor c = afClass.getDeclaredConstructor(UnifiedArgumentCollection.class, int.class, Logger.class, PrintStream.class); + + return (AlleleFrequencyCalculationModel)c.newInstance(args); + } + catch (Exception e) { + throw new IllegalArgumentException("Unexpected AlleleFrequencyCalculationModel " + UAC.AFmodel); + } + } + } + throw new IllegalArgumentException("Unexpected AlleleFrequencyCalculationModel " + UAC.AFmodel); } public static VariantContext getVCFromAllelesRod(RefMetaDataTracker tracker, ReferenceContext ref, GenomeLoc loc, boolean requireSNP, Logger logger, final RodBinding allelesBinding) { diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index 7c882ac6d..c4b0165ca 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -29,6 +29,7 @@ import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import java.math.BigDecimal; @@ -48,6 +49,7 @@ public class MathUtils { } public static final double[] log10Cache; + public static final double[] log10FactorialCache; private static final double[] jacobianLogTable; private static final double JACOBIAN_LOG_TABLE_STEP = 0.001; private static final double JACOBIAN_LOG_TABLE_INV_STEP = 1.0 / 0.001; @@ -58,11 +60,14 @@ public class MathUtils { static { log10Cache = new double[LOG10_CACHE_SIZE]; + log10FactorialCache = new double[LOG10_CACHE_SIZE]; jacobianLogTable = new double[JACOBIAN_LOG_TABLE_SIZE]; log10Cache[0] = Double.NEGATIVE_INFINITY; - for (int k = 1; k < LOG10_CACHE_SIZE; k++) + for (int k = 1; k < LOG10_CACHE_SIZE; k++) { log10Cache[k] = Math.log10(k); + log10FactorialCache[k] = log10FactorialCache[k-1] + log10Cache[k]; + } for (int k = 0; k < JACOBIAN_LOG_TABLE_SIZE; k++) { jacobianLogTable[k] = Math.log10(1.0 + Math.pow(10.0, -((double) k) * JACOBIAN_LOG_TABLE_STEP)); @@ -233,6 +238,9 @@ public class MathUtils { double sum = 0.0; double maxValue = Utils.findMaxEntry(log10p); + if(maxValue == Double.NEGATIVE_INFINITY) + return sum; + for (int i = start; i < finish; i++) { sum += Math.pow(10.0, log10p[i] - maxValue); } @@ -1046,6 +1054,28 @@ public class MathUtils { } + /** + * Given two log-probability vectors, compute log of vector product of them: + * in Matlab notation, return log10(10.*x'*10.^y) + * @param x vector 1 + * @param y vector 2 + * @return a double representing log (dotProd(10.^x,10.^y) + */ + public static double logDotProduct(double [] x, double[] y) { + if (x.length != y.length) + throw new ReviewedStingException("BUG: Vectors of different lengths"); + + double tmpVec[] = new double[x.length]; + + for (int k=0; k < tmpVec.length; k++ ) { + tmpVec[k] = x[k]+y[k]; + } + + return log10sumLog10(tmpVec); + + + + } public static Object getMedian(List list) { return orderStatisticSearch((int) Math.ceil(list.size() / 2), list); } @@ -1485,6 +1515,24 @@ public class MathUtils { return result; } + /** Same routine, unboxed types for efficiency + * + * @param x + * @param y + * @return Vector of same length as x and y so that z[k] = x[k]+y[k] + */ + public static double[] vectorSum(double[]x, double[] y) { + if (x.length != y.length) + throw new ReviewedStingException("BUG: Lengths of x and y must be the same"); + + double[] result = new double[x.length]; + for (int k=0; k Double[] scalarTimesVector(E a, E[] v1) { Double result[] = new Double[v1.length]; diff --git a/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java index 90c92189e..ec0db12d3 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/report/GATKReportUnitTest.java @@ -34,21 +34,22 @@ import java.io.IOException; import java.io.PrintStream; public class GATKReportUnitTest extends BaseTest { - @Test(enabled = false) + @Test public void testParse() throws Exception { String reportPath = validationDataLocation + "exampleGATKReportv1.tbl"; GATKReport report = new GATKReport(reportPath); + Assert.assertEquals(report.getVersion(), GATKReportVersion.V1_0); + Assert.assertEquals(report.getTables().size(), 5); GATKReportTable countVariants = report.getTable("CountVariants"); - //Assert.assertEquals(countVariants.getVersion(), GATKReportVersion.V0_1); - Object countVariantsPK = countVariants.getPrimaryKey("none.eval.none.all"); - Assert.assertEquals(countVariants.get(countVariantsPK, "nProcessedLoci"), "100000"); - Assert.assertEquals(countVariants.get(countVariantsPK, "nNoCalls"), "99872"); + Object countVariantsPK = countVariants.getPrimaryKeyByData("dbsnp.eval.none.all"); + Assert.assertEquals(countVariants.get(countVariantsPK, "nProcessedLoci"), "63025520"); + Assert.assertEquals(countVariants.get(countVariantsPK, "nNoCalls"), "0"); + Assert.assertEquals(countVariants.get(countVariantsPK, "heterozygosity"), 4.73e-06); GATKReportTable validationReport = report.getTable("ValidationReport"); - //Assert.assertEquals(validationReport.getVersion(), GATKReportVersion.V0_1); - Object validationReportPK = countVariants.getPrimaryKey("none.eval.none.known"); - Assert.assertEquals(validationReport.get(validationReportPK, "sensitivity"), "NaN"); + Object validationReportPK = countVariants.getPrimaryKeyByData("dbsnp.eval.none.novel"); + Assert.assertEquals(validationReport.get(validationReportPK, "PPV"), Double.NaN); } @DataProvider(name = "rightAlignValues") @@ -117,15 +118,15 @@ public class GATKReportUnitTest extends BaseTest { report1.addTable("TableName", "Description"); report1.getTable("TableName").addPrimaryKey("id", displayPK); report1.getTable("TableName").addColumn("colA", GATKReportDataType.String.getDefaultValue(), "%s"); - report1.getTable("TableName").addColumn("colB", GATKReportDataType.Byte.getDefaultValue(), "%c"); + report1.getTable("TableName").addColumn("colB", GATKReportDataType.Character.getDefaultValue(), "%c"); report1.getTable("TableName").set(1, "colA", "NotNum"); - report1.getTable("TableName").set(1, "colB", (byte) 64); + report1.getTable("TableName").set(1, "colB", (char) 64); report2 = new GATKReport(); report2.addTable("TableName", "Description"); report2.getTable("TableName").addPrimaryKey("id", displayPK); report2.getTable("TableName").addColumn("colA", GATKReportDataType.String.getDefaultValue(), "%s"); - report2.getTable("TableName").addColumn("colB", GATKReportDataType.Byte.getDefaultValue(), "%c"); + report2.getTable("TableName").addColumn("colB", GATKReportDataType.Character.getDefaultValue(), "%c"); report2.getTable("TableName").set(2, "colA", "df3"); report2.getTable("TableName").set(2, "colB", 'A'); @@ -133,7 +134,7 @@ public class GATKReportUnitTest extends BaseTest { report3.addTable("TableName", "Description"); report3.getTable("TableName").addPrimaryKey("id", displayPK); report3.getTable("TableName").addColumn("colA", GATKReportDataType.String.getDefaultValue(), "%s"); - report3.getTable("TableName").addColumn("colB", GATKReportDataType.Byte.getDefaultValue(), "%c"); + report3.getTable("TableName").addColumn("colB", GATKReportDataType.Character.getDefaultValue(), "%c"); report3.getTable("TableName").set(3, "colA", "df5f"); report3.getTable("TableName").set(3, "colB", 'c'); @@ -146,13 +147,13 @@ public class GATKReportUnitTest extends BaseTest { table.addColumn("SomeInt", GATKReportDataType.Integer.getDefaultValue(), true, "%d"); table.addColumn("SomeFloat", GATKReportDataType.Decimal.getDefaultValue(), true, "%.16E"); table.addColumn("TrueFalse", false, true, "%B"); - table.set("12df", "SomeInt", 34); + table.set("12df", "SomeInt", Byte.MAX_VALUE); table.set("12df", "SomeFloat", 34.0); table.set("12df", "TrueFalse", true); - table.set("5f", "SomeInt", -1); - table.set("5f", "SomeFloat", 0.000003); + table.set("5f", "SomeInt", Short.MAX_VALUE); + table.set("5f", "SomeFloat", Double.MAX_VALUE); table.set("5f", "TrueFalse", false); - table.set("RZ", "SomeInt", 904948230958203958L); + table.set("RZ", "SomeInt", Long.MAX_VALUE); table.set("RZ", "SomeFloat", 535646345.657453464576); table.set("RZ", "TrueFalse", true); diff --git a/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java index 482f4da80..adc7927a7 100755 --- a/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/MathUtilsUnitTest.java @@ -284,6 +284,18 @@ public class MathUtilsUnitTest extends BaseTest { Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[] {-1.0, -3.0, -1.0, -2.0}), new double[] {0.1 * 1.0 / 0.211, 0.001 * 1.0 / 0.211, 0.1 * 1.0 / 0.211, 0.01 * 1.0 / 0.211})); } + @Test + public void testDotProduct() { + Assert.assertEquals(MathUtils.dotProduct(new Double[]{-5.0,-3.0,2.0}, new Double[]{6.0,7.0,8.0}),-35.0,1e-3); + Assert.assertEquals(MathUtils.dotProduct(new Double[]{-5.0}, new Double[]{6.0}),-30.0,1e-3); + } + + @Test + public void testLogDotProduct() { + Assert.assertEquals(MathUtils.logDotProduct(new double[]{-5.0,-3.0,2.0}, new double[]{6.0,7.0,8.0}),10.0,1e-3); + Assert.assertEquals(MathUtils.logDotProduct(new double[]{-5.0}, new double[]{6.0}),1.0,1e-3); + } + /** * Private function used by testNormalizeFromLog10() */ diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala index f0feb207b..22f4f6225 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala @@ -136,7 +136,7 @@ object PipelineTest extends BaseTest with Logging { println(" value (min,target,max) table key metric") for (validation <- evalSpec.validations) { val table = report.getTable(validation.table) - val key = table.getPrimaryKey(validation.key) + val key = table.getPrimaryKeyByData(validation.key) val value = String.valueOf(table.get(key, validation.metric)) val inRange = if (value == null) false else validation.inRange(value) val flag = if (!inRange) "*" else " "