From 4a57add6d0e7591bdbcd059d0195061d3a6a8152 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Wed, 1 Feb 2012 19:35:33 -0500 Subject: [PATCH] First implementation of DiagnoseTargets * calculates and interprets the coverage of a given interval track * allows to expand intervals by specified number of bases * classifies targets as CALLABLE, LOW_COVERAGE, EXCESSIVE_COVERAGE and POOR_QUALITY. * outputs text file for now (testing purposes only), soon to be VCF. * filters are overly aggressive for now. --- .../diagnostics/targets/CallableStatus.java | 22 ++ .../diagnostics/targets/DiagnoseTargets.java | 172 ++++++++++++ .../targets/IntervalStatisticLocus.java | 34 +++ .../targets/IntervalStatistics.java | 122 +++++++++ .../broadinstitute/sting/utils/MathUtils.java | 253 ++++++++---------- 5 files changed, 457 insertions(+), 146 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/CallableStatus.java create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/IntervalStatisticLocus.java create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/IntervalStatistics.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/CallableStatus.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/CallableStatus.java new file mode 100644 index 000000000..60f20074a --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/CallableStatus.java @@ -0,0 +1,22 @@ +package org.broadinstitute.sting.gatk.walkers.diagnostics.targets; + +/** + * Short one line description of the walker. + * + * @author Mauricio Carneiro + * @since 2/1/12 + */ +public enum CallableStatus { + /** the reference base was an N, which is not considered callable the GATK */ + REF_N, + /** the base satisfied the min. depth for calling but had less than maxDepth to avoid having EXCESSIVE_COVERAGE */ + CALLABLE, + /** absolutely no reads were seen at this locus, regardless of the filtering parameters */ + NO_COVERAGE, + /** there were less than min. depth bases at the locus, after applying filters */ + LOW_COVERAGE, + /** more than -maxDepth read at the locus, indicating some sort of mapping problem */ + EXCESSIVE_COVERAGE, + /** more than --maxFractionOfReadsWithLowMAPQ at the locus, indicating a poor mapping quality of the reads */ + POOR_QUALITY +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java new file mode 100644 index 000000000..979fb665f --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargets.java @@ -0,0 +1,172 @@ +package org.broadinstitute.sting.gatk.walkers.diagnostics.targets; + +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Input; +import org.broadinstitute.sting.commandline.IntervalBinding; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.By; +import org.broadinstitute.sting.gatk.walkers.DataSource; +import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocComparator; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.exceptions.UserException; + +import java.io.PrintStream; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.TreeSet; + +/** + * Short one line description of the walker. + * + *

+ * [Long description of the walker] + *

+ * + * + *

Input

+ *

+ * [Description of the Input] + *

+ * + *

Output

+ *

+ * [Description of the Output] + *

+ * + *

Examples

+ *
+ *    java
+ *      -jar GenomeAnalysisTK.jar
+ *      -T [walker name]
+ *  
+ * + * @author Mauricio Carneiro + * @since 2/1/12 + */ +@By(value = DataSource.READS) +public class DiagnoseTargets extends LocusWalker { + @Input(fullName = "interval_track", shortName = "int", doc = "", required = true) + private IntervalBinding intervalTrack = null; + + @Output + private PrintStream out = System.out; + + @Argument(fullName = "expand_interval", shortName = "exp", doc = "", required = false) + private int expandInterval = 50; + + @Argument(fullName = "minimum_base_quality", shortName = "mbq", doc = "", required = false) + private int minimumBaseQuality = 20; + + @Argument(fullName = "minimum_mapping_quality", shortName = "mmq", doc = "", required = false) + private int minimumMappingQuality = 20; + + @Argument(fullName = "minimum_coverage", shortName = "mincov", doc = "", required = false) + private int minimumCoverage = 5; + + @Argument(fullName = "maximum_coverage", shortName = "maxcov", doc = "", required = false) + private int maximumCoverage = 700; + + private TreeSet intervalList = null; // The list of intervals of interest (plus expanded intervals if user wants them) + private HashMap intervalMap = null; // interval => statistics + private Iterator intervalListIterator; // An iterator to go over all the intervals provided as we traverse the genome + private GenomeLoc currentInterval = null; // The "current" interval loaded and being filled with statistics + private IntervalStatistics currentIntervalStatistics = null; // The "current" interval loaded and being filled with statistics + + private GenomeLocParser parser; // just an object to allow us to create genome locs (for the expanded intervals) + + @Override + public void initialize() { + super.initialize(); + + if (intervalTrack == null) + throw new UserException("This tool currently only works if you provide an interval track"); + + parser = new GenomeLocParser(getToolkit().getMasterSequenceDictionary()); // Important to initialize the parser before creating the intervals below + + List originalList = intervalTrack.getIntervals(getToolkit()); // The original list of targets provided by the user that will be expanded or not depending on the options provided + intervalList = new TreeSet(new GenomeLocComparator()); + intervalMap = new HashMap(originalList.size() * 2); + for (GenomeLoc interval : originalList) + addAndExpandIntervalToLists(interval); + + intervalListIterator = intervalList.iterator(); + } + + @Override + public Long map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + GenomeLoc refLocus = ref.getLocus(); + while (currentInterval == null || currentInterval.isBefore(refLocus)) { + if (!intervalListIterator.hasNext()) + return 0L; + + currentInterval = intervalListIterator.next(); + currentIntervalStatistics = intervalMap.get(currentInterval); + } + + if (currentInterval.isPast(refLocus)) + return 0L; + + byte[] mappingQualities = context.getBasePileup().getMappingQuals(); + byte[] baseQualities = context.getBasePileup().getQuals(); + int coverage = context.getBasePileup().getBaseAndMappingFilteredPileup(minimumBaseQuality, minimumMappingQuality).depthOfCoverage(); + int rawCoverage = context.size(); + + IntervalStatisticLocus locusData = new IntervalStatisticLocus(mappingQualities, baseQualities, coverage, rawCoverage); + currentIntervalStatistics.addLocus(refLocus, locusData); + + return 1L; + } + + @Override + public Long reduceInit() { + return 0L; + } + + @Override + public Long reduce(Long value, Long sum) { + return sum + value; + } + + @Override + public void onTraversalDone(Long result) { + super.onTraversalDone(result); + out.println("Interval\tCallStatus\tCOV\tAVG"); + for (GenomeLoc interval : intervalList) { + IntervalStatistics stats = intervalMap.get(interval); + out.println(String.format("%s\t%s\t%d\t%f", interval, stats.callableStatus(), stats.totalCoverage(), stats.averageCoverage())); + } + } + + private GenomeLoc createIntervalBefore(GenomeLoc interval) { + int start = Math.max(interval.getStart() - expandInterval, 0); + int stop = Math.max(interval.getStart() - 1, 0); + return parser.createGenomeLoc(interval.getContig(), interval.getContigIndex(), start, stop); + } + + private GenomeLoc createIntervalAfter(GenomeLoc interval) { + int contigLimit = getToolkit().getSAMFileHeader().getSequenceDictionary().getSequence(interval.getContigIndex()).getSequenceLength(); + int start = Math.min(interval.getStop() + 1, contigLimit); + int stop = Math.min(interval.getStop() + expandInterval, contigLimit); + return parser.createGenomeLoc(interval.getContig(), interval.getContigIndex(), start, stop); + } + + private void addAndExpandIntervalToLists(GenomeLoc interval) { + if (expandInterval > 0) { + GenomeLoc before = createIntervalBefore(interval); + GenomeLoc after = createIntervalAfter(interval); + intervalList.add(before); + intervalList.add(after); + intervalMap.put(before, new IntervalStatistics(before, minimumCoverage, maximumCoverage, minimumMappingQuality, minimumBaseQuality)); + intervalMap.put(after, new IntervalStatistics(after, minimumCoverage, maximumCoverage, minimumMappingQuality, minimumBaseQuality)); + } + intervalList.add(interval); + intervalMap.put(interval, new IntervalStatistics(interval, minimumCoverage, maximumCoverage, minimumMappingQuality, minimumBaseQuality)); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/IntervalStatisticLocus.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/IntervalStatisticLocus.java new file mode 100644 index 000000000..5620c3902 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/IntervalStatisticLocus.java @@ -0,0 +1,34 @@ +package org.broadinstitute.sting.gatk.walkers.diagnostics.targets; + +/** + * The definition of a locus for the DiagnoseTargets walker statistics calculation + * + * @author Mauricio Carneiro + * @since 2/3/12 + */ +class IntervalStatisticLocus { + private final byte[] mappingQuality; + private final byte[] baseQuality; + private final int coverage; + private final int rawCoverage; + + public IntervalStatisticLocus(byte[] mappingQuality, byte[] baseQuality, int coverage, int rawCoverage) { + this.mappingQuality = mappingQuality; + this.baseQuality = baseQuality; + this.coverage = coverage; + this.rawCoverage = rawCoverage; + } + + public IntervalStatisticLocus() { + this(new byte[1], new byte[1], 0, 0); + } + + public int getCoverage() { + return coverage; + } + + public int getRawCoverage() { + return rawCoverage; + } + +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/IntervalStatistics.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/IntervalStatistics.java new file mode 100644 index 000000000..8ee5f76fb --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/IntervalStatistics.java @@ -0,0 +1,122 @@ +package org.broadinstitute.sting.gatk.walkers.diagnostics.targets; + +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; + +import java.util.ArrayList; +import java.util.HashMap; + +/** + * Short one line description of the walker. + * + * @author Mauricio Carneiro + * @since 2/1/12 + */ +class IntervalStatistics { + private final GenomeLoc interval; + private final ArrayList loci; + + private final int minimumCoverageThreshold; + private final int maximumCoverageThreshold; + private final int minimumMappingQuality; + private final int minimumBaseQuality; + + private int preComputedTotalCoverage = -1; // avoids re-calculating the total sum (-1 means we haven't pre-computed it yet) + + private IntervalStatistics(GenomeLoc interval, ArrayList loci, int minimumCoverageThreshold, int maximumCoverageThreshold, int minimumMappingQuality, int minimumBaseQuality) { + this.interval = interval; + this.loci = loci; + this.minimumCoverageThreshold = minimumCoverageThreshold; + this.maximumCoverageThreshold = maximumCoverageThreshold; + this.minimumMappingQuality = minimumMappingQuality; + this.minimumBaseQuality = minimumBaseQuality; + } + + public IntervalStatistics(GenomeLoc interval, int minimumCoverageThreshold, int maximumCoverageThreshold, int minimumMappingQuality, int minimumBaseQuality) { + this(interval, new ArrayList(interval.size()), minimumCoverageThreshold, maximumCoverageThreshold, minimumMappingQuality, minimumBaseQuality); + + // Initialize every loci (this way we don't have to worry about non-existent loci in the object + for (int i = 0; i < interval.size(); i++) + this.loci.add(i, new IntervalStatisticLocus()); + + } + + public long totalCoverage() { + if (preComputedTotalCoverage < 0) + calculateTotalCoverage(); + return preComputedTotalCoverage; + } + + public double averageCoverage() { + if (preComputedTotalCoverage < 0) + calculateTotalCoverage(); + return (double) preComputedTotalCoverage / loci.size(); + } + + /** + * Calculates the callable status of the entire interval + * + * @return the callable status of the entire interval + */ + public CallableStatus callableStatus() { + long max = -1; + CallableStatus maxCallableStatus = null; + HashMap statusCounts = new HashMap(CallableStatus.values().length); + + // initialize the statusCounts with all callable states + for (CallableStatus key : CallableStatus.values()) + statusCounts.put(key, 0); + + // calculate the callable status for each locus + for (int i = 0; i < loci.size(); i++) { + CallableStatus status = callableStatus(i); + int count = statusCounts.get(status) + 1; + statusCounts.put(status, count); + + if (count > max) { + max = count; + maxCallableStatus = status; + } + } + + return maxCallableStatus; + } + + public void addLocus(GenomeLoc locus, IntervalStatisticLocus locusData) { + if (!interval.containsP(locus)) + throw new ReviewedStingException(String.format("Locus %s is not part of the Interval", locus)); + + int locusIndex = locus.getStart() - interval.getStart(); + + loci.add(locusIndex, locusData); + } + + /** + * returns the callable status of this locus without taking the reference base into account. + * + * @param locusIndex location in the genome to inquire (only one locus) + * @return the callable status of a locus + */ + private CallableStatus callableStatus(int locusIndex) { + if (loci.get(locusIndex).getCoverage() > maximumCoverageThreshold) + return CallableStatus.EXCESSIVE_COVERAGE; + + if (loci.get(locusIndex).getCoverage() >= minimumCoverageThreshold) + return CallableStatus.CALLABLE; + + if (loci.get(locusIndex).getRawCoverage() >= minimumCoverageThreshold) + return CallableStatus.POOR_QUALITY; + + if (loci.get(locusIndex).getRawCoverage() > 0) + return CallableStatus.LOW_COVERAGE; + + return CallableStatus.NO_COVERAGE; + } + + private void calculateTotalCoverage() { + preComputedTotalCoverage = 0; + for (IntervalStatisticLocus locus : loci) + preComputedTotalCoverage += locus.getCoverage(); + } + +} diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index 814cb2765..a4e9fc7ed 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -49,7 +49,6 @@ public class MathUtils { * high precision */ - /** * Private constructor. No instantiating this class! */ @@ -60,48 +59,48 @@ public class MathUtils { // under/overflow checking, so this shouldn't be used in the general case (but is fine // if one is already make those checks before calling in to the rounding). public static int fastRound(double d) { - return (d > 0) ? (int)(d + 0.5d) : (int)(d - 0.5d); + return (d > 0) ? (int) (d + 0.5d) : (int) (d - 0.5d); } public static double approximateLog10SumLog10(final double[] vals) { - return approximateLog10SumLog10(vals, vals.length); + return approximateLog10SumLog10(vals, vals.length); } public static double approximateLog10SumLog10(final double[] vals, final int endIndex) { - final int maxElementIndex = MathUtils.maxElementIndex(vals, endIndex); - double approxSum = vals[maxElementIndex]; - if ( approxSum == Double.NEGATIVE_INFINITY ) + final int maxElementIndex = MathUtils.maxElementIndex(vals, endIndex); + double approxSum = vals[maxElementIndex]; + if (approxSum == Double.NEGATIVE_INFINITY) return approxSum; - for ( int i = 0; i < endIndex; i++ ) { - if ( i == maxElementIndex || vals[i] == Double.NEGATIVE_INFINITY ) - continue; + for (int i = 0; i < endIndex; i++) { + if (i == maxElementIndex || vals[i] == Double.NEGATIVE_INFINITY) + continue; - final double diff = approxSum - vals[i]; - if ( diff < MathUtils.MAX_JACOBIAN_TOLERANCE ) { - // See notes from the 2-inout implementation below - final int ind = fastRound(diff / MathUtils.JACOBIAN_LOG_TABLE_STEP); // hard rounding - approxSum += MathUtils.jacobianLogTable[ind]; - } - } + final double diff = approxSum - vals[i]; + if (diff < MathUtils.MAX_JACOBIAN_TOLERANCE) { + // See notes from the 2-inout implementation below + final int ind = fastRound(diff / MathUtils.JACOBIAN_LOG_TABLE_STEP); // hard rounding + approxSum += MathUtils.jacobianLogTable[ind]; + } + } return approxSum; } public static double approximateLog10SumLog10(double small, double big) { // make sure small is really the smaller value - if ( small > big ) { + if (small > big) { final double t = big; big = small; small = t; } - if ( small == Double.NEGATIVE_INFINITY || big == Double.NEGATIVE_INFINITY ) + if (small == Double.NEGATIVE_INFINITY || big == Double.NEGATIVE_INFINITY) return big; - final double diff = big - small; - if ( diff >= MathUtils.MAX_JACOBIAN_TOLERANCE ) + final double diff = big - small; + if (diff >= MathUtils.MAX_JACOBIAN_TOLERANCE) return big; // OK, so |y-x| < tol: we use the following identity then: @@ -137,7 +136,7 @@ public class MathUtils { return size; } - + public static double average(Collection x) { return (double) sum(x) / x.size(); } @@ -145,7 +144,8 @@ public class MathUtils { public static double average(Collection numbers, boolean ignoreNan) { if (ignoreNan) { return sum(numbers, true) / nonNanSize(numbers); - } else { + } + else { return sum(numbers, false) / nonNanSize(numbers); } } @@ -176,7 +176,8 @@ public class MathUtils { public static double sum(double[] values) { double s = 0.0; - for (double v : values) s += v; + for (double v : values) + s += v; return s; } @@ -187,7 +188,6 @@ public class MathUtils { return total; } - /** * Calculates the log10 cumulative sum of an array with log10 probabilities * @@ -229,21 +229,23 @@ public class MathUtils { public static double sumDoubles(List values) { double s = 0.0; - for (double v : values) s += v; + for (double v : values) + s += v; return s; } public static int sumIntegers(List values) { int s = 0; - for (int v : values) s += v; + for (int v : values) + s += v; return s; } public static double sumLog10(double[] log10values) { return Math.pow(10.0, log10sumLog10(log10values)); -// double s = 0.0; -// for ( double v : log10values) s += Math.pow(10.0, v); -// return s; + // double s = 0.0; + // for ( double v : log10values) s += Math.pow(10.0, v); + // return s; } public static double log10sumLog10(double[] log10values) { @@ -456,7 +458,6 @@ public class MathUtils { return Math.sqrt(rms); } - /** * calculate the Root Mean Square of an array of integers * @@ -517,7 +518,6 @@ public class MathUtils { return result; } - /** * normalizes the log10-based array. ASSUMES THAT ALL ARRAY ENTRIES ARE <= 0 (<= 1 IN REAL-SPACE). * @@ -554,7 +554,8 @@ public class MathUtils { sum += normalized[i]; for (int i = 0; i < array.length; i++) { double x = normalized[i] / sum; - if (takeLog10OfOutput) x = Math.log10(x); + if (takeLog10OfOutput) + x = Math.log10(x); normalized[i] = x; } @@ -576,7 +577,8 @@ public class MathUtils { sum += normalized[i]; for (int i = 0; i < array.size(); i++) { double x = normalized[i] / sum; - if (takeLog10OfOutput) x = Math.log10(x); + if (takeLog10OfOutput) + x = Math.log10(x); normalized[i] = x; } @@ -598,11 +600,12 @@ public class MathUtils { } public static int maxElementIndex(final double[] array) { - return maxElementIndex(array, array.length); + return maxElementIndex(array, array.length); } public static int maxElementIndex(final double[] array, final int endIndex) { - if (array == null) throw new IllegalArgumentException("Array cannot be null!"); + if (array == null) + throw new IllegalArgumentException("Array cannot be null!"); int maxI = -1; for (int i = 0; i < endIndex; i++) { @@ -614,11 +617,12 @@ public class MathUtils { } public static int maxElementIndex(final int[] array) { - return maxElementIndex(array, array.length); + return maxElementIndex(array, array.length); } public static int maxElementIndex(final int[] array, int endIndex) { - if (array == null) throw new IllegalArgumentException("Array cannot be null!"); + if (array == null) + throw new IllegalArgumentException("Array cannot be null!"); int maxI = -1; for (int i = 0; i < endIndex; i++) { @@ -646,7 +650,8 @@ public class MathUtils { } public static int minElementIndex(double[] array) { - if (array == null) throw new IllegalArgumentException("Array cannot be null!"); + if (array == null) + throw new IllegalArgumentException("Array cannot be null!"); int minI = -1; for (int i = 0; i < array.length; i++) { @@ -658,7 +663,8 @@ public class MathUtils { } public static int minElementIndex(byte[] array) { - if (array == null) throw new IllegalArgumentException("Array cannot be null!"); + if (array == null) + throw new IllegalArgumentException("Array cannot be null!"); int minI = -1; for (int i = 0; i < array.length; i++) { @@ -670,7 +676,8 @@ public class MathUtils { } public static int minElementIndex(int[] array) { - if (array == null) throw new IllegalArgumentException("Array cannot be null!"); + if (array == null) + throw new IllegalArgumentException("Array cannot be null!"); int minI = -1; for (int i = 0; i < array.length; i++) { @@ -682,20 +689,26 @@ public class MathUtils { } public static int arrayMaxInt(List array) { - if (array == null) throw new IllegalArgumentException("Array cannot be null!"); - if (array.size() == 0) throw new IllegalArgumentException("Array size cannot be 0!"); + if (array == null) + throw new IllegalArgumentException("Array cannot be null!"); + if (array.size() == 0) + throw new IllegalArgumentException("Array size cannot be 0!"); int m = array.get(0); - for (int e : array) m = Math.max(m, e); + for (int e : array) + m = Math.max(m, e); return m; } public static double arrayMaxDouble(List array) { - if (array == null) throw new IllegalArgumentException("Array cannot be null!"); - if (array.size() == 0) throw new IllegalArgumentException("Array size cannot be 0!"); + if (array == null) + throw new IllegalArgumentException("Array cannot be null!"); + if (array.size() == 0) + throw new IllegalArgumentException("Array size cannot be 0!"); double m = array.get(0); - for (double e : array) m = Math.max(m, e); + for (double e : array) + m = Math.max(m, e); return m; } @@ -816,7 +829,6 @@ public class MathUtils { return permutation; } - public static int[] permuteArray(int[] array, Integer[] permutation) { int[] output = new int[array.length]; for (int i = 0; i < output.length; i++) { @@ -857,7 +869,6 @@ public class MathUtils { return output; } - /** * Draw N random elements from list. */ @@ -923,7 +934,8 @@ public class MathUtils { public static int countOccurrences(T x, List l) { int count = 0; for (T y : l) { - if (x.equals(y)) count++; + if (x.equals(y)) + count++; } return count; @@ -1031,9 +1043,11 @@ public class MathUtils { for (Comparable y : list) { if (x.compareTo(y) > 0) { lessThanX.add(y); - } else if (x.compareTo(y) < 0) { + } + else if (x.compareTo(y) < 0) { greaterThanX.add(y); - } else + } + else equalToX.add(y); } @@ -1046,7 +1060,6 @@ public class MathUtils { } - public static Object getMedian(List list) { return orderStatisticSearch((int) Math.ceil(list.size() / 2), list); } @@ -1076,10 +1089,12 @@ public class MathUtils { if (quality < qk) { lessThanQReads.add(read); lessThanQOffsets.add(offset); - } else if (quality > qk) { + } + else if (quality > qk) { greaterThanQReads.add(read); greaterThanQOffsets.add(offset); - } else { + } + else { equalToQReads.add(reads.get(iter)); } } @@ -1100,7 +1115,7 @@ public class MathUtils { public static long sum(Collection x) { long sum = 0; for (int v : x) - sum += v; + sum += v; return sum; } @@ -1209,8 +1224,7 @@ public class MathUtils { log10Cache[k] = Math.log10(k); for (int k = 0; k < JACOBIAN_LOG_TABLE_SIZE; k++) { - jacobianLogTable[k] = Math.log10(1.0 + Math.pow(10.0, -((double) k) - * JACOBIAN_LOG_TABLE_STEP)); + jacobianLogTable[k] = Math.log10(1.0 + Math.pow(10.0, -((double) k) * JACOBIAN_LOG_TABLE_STEP)); } } @@ -1257,7 +1271,8 @@ public class MathUtils { else if (diff >= 0) { int ind = (int) (diff * INV_JACOBIAN_LOG_TABLE_STEP + 0.5); return x + jacobianLogTable[ind]; - } else { + } + else { int ind = (int) (-diff * INV_JACOBIAN_LOG_TABLE_STEP + 0.5); return y + jacobianLogTable[ind]; } @@ -1298,71 +1313,7 @@ public class MathUtils { /** * Constants to simplify the log gamma function calculation. */ - private static final double - zero = 0.0, - one = 1.0, - half = .5, - a0 = 7.72156649015328655494e-02, - a1 = 3.22467033424113591611e-01, - a2 = 6.73523010531292681824e-02, - a3 = 2.05808084325167332806e-02, - a4 = 7.38555086081402883957e-03, - a5 = 2.89051383673415629091e-03, - a6 = 1.19270763183362067845e-03, - a7 = 5.10069792153511336608e-04, - a8 = 2.20862790713908385557e-04, - a9 = 1.08011567247583939954e-04, - a10 = 2.52144565451257326939e-05, - a11 = 4.48640949618915160150e-05, - tc = 1.46163214496836224576e+00, - tf = -1.21486290535849611461e-01, - tt = -3.63867699703950536541e-18, - t0 = 4.83836122723810047042e-01, - t1 = -1.47587722994593911752e-01, - t2 = 6.46249402391333854778e-02, - t3 = -3.27885410759859649565e-02, - t4 = 1.79706750811820387126e-02, - t5 = -1.03142241298341437450e-02, - t6 = 6.10053870246291332635e-03, - t7 = -3.68452016781138256760e-03, - t8 = 2.25964780900612472250e-03, - t9 = -1.40346469989232843813e-03, - t10 = 8.81081882437654011382e-04, - t11 = -5.38595305356740546715e-04, - t12 = 3.15632070903625950361e-04, - t13 = -3.12754168375120860518e-04, - t14 = 3.35529192635519073543e-04, - u0 = -7.72156649015328655494e-02, - u1 = 6.32827064025093366517e-01, - u2 = 1.45492250137234768737e+00, - u3 = 9.77717527963372745603e-01, - u4 = 2.28963728064692451092e-01, - u5 = 1.33810918536787660377e-02, - v1 = 2.45597793713041134822e+00, - v2 = 2.12848976379893395361e+00, - v3 = 7.69285150456672783825e-01, - v4 = 1.04222645593369134254e-01, - v5 = 3.21709242282423911810e-03, - s0 = -7.72156649015328655494e-02, - s1 = 2.14982415960608852501e-01, - s2 = 3.25778796408930981787e-01, - s3 = 1.46350472652464452805e-01, - s4 = 2.66422703033638609560e-02, - s5 = 1.84028451407337715652e-03, - s6 = 3.19475326584100867617e-05, - r1 = 1.39200533467621045958e+00, - r2 = 7.21935547567138069525e-01, - r3 = 1.71933865632803078993e-01, - r4 = 1.86459191715652901344e-02, - r5 = 7.77942496381893596434e-04, - r6 = 7.32668430744625636189e-06, - w0 = 4.18938533204672725052e-01, - w1 = 8.33333333333329678849e-02, - w2 = -2.77777777728775536470e-03, - w3 = 7.93650558643019558500e-04, - w4 = -5.95187557450339963135e-04, - w5 = 8.36339918996282139126e-04, - w6 = -1.63092934096575273989e-03; + private static final double zero = 0.0, one = 1.0, half = .5, a0 = 7.72156649015328655494e-02, a1 = 3.22467033424113591611e-01, a2 = 6.73523010531292681824e-02, a3 = 2.05808084325167332806e-02, a4 = 7.38555086081402883957e-03, a5 = 2.89051383673415629091e-03, a6 = 1.19270763183362067845e-03, a7 = 5.10069792153511336608e-04, a8 = 2.20862790713908385557e-04, a9 = 1.08011567247583939954e-04, a10 = 2.52144565451257326939e-05, a11 = 4.48640949618915160150e-05, tc = 1.46163214496836224576e+00, tf = -1.21486290535849611461e-01, tt = -3.63867699703950536541e-18, t0 = 4.83836122723810047042e-01, t1 = -1.47587722994593911752e-01, t2 = 6.46249402391333854778e-02, t3 = -3.27885410759859649565e-02, t4 = 1.79706750811820387126e-02, t5 = -1.03142241298341437450e-02, t6 = 6.10053870246291332635e-03, t7 = -3.68452016781138256760e-03, t8 = 2.25964780900612472250e-03, t9 = -1.40346469989232843813e-03, t10 = 8.81081882437654011382e-04, t11 = -5.38595305356740546715e-04, t12 = 3.15632070903625950361e-04, t13 = -3.12754168375120860518e-04, t14 = 3.35529192635519073543e-04, u0 = -7.72156649015328655494e-02, u1 = 6.32827064025093366517e-01, u2 = 1.45492250137234768737e+00, u3 = 9.77717527963372745603e-01, u4 = 2.28963728064692451092e-01, u5 = 1.33810918536787660377e-02, v1 = 2.45597793713041134822e+00, v2 = 2.12848976379893395361e+00, v3 = 7.69285150456672783825e-01, v4 = 1.04222645593369134254e-01, v5 = 3.21709242282423911810e-03, s0 = -7.72156649015328655494e-02, s1 = 2.14982415960608852501e-01, s2 = 3.25778796408930981787e-01, s3 = 1.46350472652464452805e-01, s4 = 2.66422703033638609560e-02, s5 = 1.84028451407337715652e-03, s6 = 3.19475326584100867617e-05, r1 = 1.39200533467621045958e+00, r2 = 7.21935547567138069525e-01, r3 = 1.71933865632803078993e-01, r4 = 1.86459191715652901344e-02, r5 = 7.77942496381893596434e-04, r6 = 7.32668430744625636189e-06, w0 = 4.18938533204672725052e-01, w1 = 8.33333333333329678849e-02, w2 = -2.77777777728775536470e-03, w3 = 7.93650558643019558500e-04, w4 = -5.95187557450339963135e-04, w5 = 8.36339918996282139126e-04, w6 = -1.63092934096575273989e-03; /** * Efficient rounding functions to simplify the log gamma function calculation @@ -1393,14 +1344,17 @@ public class MathUtils { /* purge off +-inf, NaN, +-0, and negative arguments */ int ix = hx & 0x7fffffff; - if (ix >= 0x7ff00000) return Double.POSITIVE_INFINITY; - if ((ix | lx) == 0 || hx < 0) return Double.NaN; + if (ix >= 0x7ff00000) + return Double.POSITIVE_INFINITY; + if ((ix | lx) == 0 || hx < 0) + return Double.NaN; if (ix < 0x3b900000) { /* |x|<2**-70, return -log(|x|) */ return -Math.log(x); } /* purge off 1 and 2 */ - if ((((ix - 0x3ff00000) | lx) == 0) || (((ix - 0x40000000) | lx) == 0)) r = 0; + if ((((ix - 0x3ff00000) | lx) == 0) || (((ix - 0x40000000) | lx) == 0)) + r = 0; /* for x < 2.0 */ else if (ix < 0x40000000) { if (ix <= 0x3feccccc) { /* lgamma(x) = lgamma(x+1)-log(x) */ @@ -1408,22 +1362,27 @@ public class MathUtils { if (ix >= 0x3FE76944) { y = one - x; i = 0; - } else if (ix >= 0x3FCDA661) { + } + else if (ix >= 0x3FCDA661) { y = x - (tc - one); i = 1; - } else { + } + else { y = x; i = 2; } - } else { + } + else { r = zero; if (ix >= 0x3FFBB4C3) { y = 2.0 - x; i = 0; - } /* [1.7316,2] */ else if (ix >= 0x3FF3B4C4) { + } /* [1.7316,2] */ + else if (ix >= 0x3FF3B4C4) { y = x - tc; i = 1; - } /* [1.23,1.73] */ else { + } /* [1.23,1.73] */ + else { y = x - one; i = 2; } @@ -1451,7 +1410,8 @@ public class MathUtils { p2 = one + y * (v1 + y * (v2 + y * (v3 + y * (v4 + y * v5)))); r += (-0.5 * y + p1 / p2); } - } else if (ix < 0x40200000) { /* x < 8.0 */ + } + else if (ix < 0x40200000) { /* x < 8.0 */ i = (int) x; t = zero; y = x - (double) i; @@ -1474,13 +1434,15 @@ public class MathUtils { break; } /* 8.0 <= x < 2**58 */ - } else if (ix < 0x43900000) { + } + else if (ix < 0x43900000) { t = Math.log(x); z = one / x; y = z * z; w = w0 + z * (w1 + y * (w2 + y * (w3 + y * (w4 + y * (w5 + y * w6))))); r = (x - half) * (t - one) + w; - } else + } + else /* 2**58 <= x <= inf */ r = x * (Math.log(x) - one); return r; @@ -1515,7 +1477,6 @@ public class MathUtils { return log10BinomialCoefficient(n, k) + log10p * k + log10OneMinusP * (n - k); } - /** * Calculates the log10 of the multinomial coefficient. Designed to prevent * overflows even with very large numbers. @@ -1559,7 +1520,6 @@ public class MathUtils { return log10Gamma(x + 1); } - /** * Adds two arrays together and returns a new array with the sum. * @@ -1597,17 +1557,18 @@ public class MathUtils { /** * Vector operations + * * @param v1 first numerical array * @param v2 second numerical array - * @return a new array with the elements added + * @return a new array with the elements added */ public static Double[] vectorSum(E v1[], E v2[]) { if (v1.length != v2.length) throw new UserException("BUG: vectors v1, v2 of different size in vectorSum()"); Double[] result = new Double[v1.length]; - for (int k=0; k < v1.length; k++) - result[k] = v1[k].doubleValue()+v2[k].doubleValue(); + for (int k = 0; k < v1.length; k++) + result[k] = v1[k].doubleValue() + v2[k].doubleValue(); return result; } @@ -1615,19 +1576,19 @@ public class MathUtils { public static Double[] scalarTimesVector(E a, E[] v1) { Double result[] = new Double[v1.length]; - for (int k=0; k < v1.length; k++) - result[k] = a.doubleValue()*v1[k].doubleValue(); + for (int k = 0; k < v1.length; k++) + result[k] = a.doubleValue() * v1[k].doubleValue(); return result; } - public static Double dotProduct(E[] v1, E[] v2) { + public static Double dotProduct(E[] v1, E[] v2) { if (v1.length != v2.length) throw new UserException("BUG: vectors v1, v2 of different size in vectorSum()"); Double result = 0.0; - for (int k=0; k < v1.length; k++) - result += v1[k].doubleValue() *v2[k].doubleValue(); + for (int k = 0; k < v1.length; k++) + result += v1[k].doubleValue() * v2[k].doubleValue(); return result; @@ -1635,7 +1596,7 @@ public class MathUtils { public static double[] vectorLog10(double v1[]) { double result[] = new double[v1.length]; - for (int k=0; k < v1.length; k++) + for (int k = 0; k < v1.length; k++) result[k] = Math.log10(v1[k]); return result; @@ -1645,7 +1606,7 @@ public class MathUtils { // todo - silly overloading, just because Java can't unbox/box arrays of primitive types, and we can't do generics with primitive types! public static Double[] vectorLog10(Double v1[]) { Double result[] = new Double[v1.length]; - for (int k=0; k < v1.length; k++) + for (int k = 0; k < v1.length; k++) result[k] = Math.log10(v1[k]); return result;