diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java index 0d969c989..2db22679a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java @@ -60,24 +60,12 @@ public class GATKReportColumn extends LinkedHashMap { if ( format.equals("") ) { this.format = "%s"; this.dataType = GATKReportDataType.Unknown; - if ( defaultValue != null ) { - this.defaultValue = defaultValue; - //this.dataType = GATKReportDataType.fromObject(defaultValue); - } - else { - this.defaultValue = ""; - //this.dataType = GATKReportDataType.Unknown; - } + this.defaultValue = (defaultValue != null) ? defaultValue : ""; } else { this.format = format; this.dataType = GATKReportDataType.fromFormatString(format); - if ( defaultValue == null ) { - this.defaultValue = dataType.getDefaultValue(); - } - else { - this.defaultValue = defaultValue; - } + this.defaultValue = (defaultValue != null) ? defaultValue : dataType.getDefaultValue(); } } @@ -225,8 +213,10 @@ public class GATKReportColumn extends LinkedHashMap { public Object put(Object key, Object value) { if (value != null) { String formatted = formatValue(value); - updateMaxWidth(formatted); - updateFormat(formatted); + if (!formatted.equals("")) { + updateMaxWidth(formatted); + updateFormat(formatted); + } } return super.put(key, value); } @@ -236,7 +226,7 @@ public class GATKReportColumn extends LinkedHashMap { } private void updateFormat(String formatted) { - if (!isRightAlign(formatted)) - alignment = GATKReportColumnFormat.Alignment.LEFT; + if (alignment == GATKReportColumnFormat.Alignment.RIGHT) + alignment = isRightAlign(formatted) ? GATKReportColumnFormat.Alignment.RIGHT : GATKReportColumnFormat.Alignment.LEFT; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java index 8e8523e88..23238631c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDataManager.java @@ -313,7 +313,7 @@ public class RecalDataManager { * @param read The read to adjust * @param RAC The list of shared command line arguments */ - public static void parseSAMRecord(final GATKSAMRecord read, final RecalibrationArgumentCollection RAC) { + public static void parsePlatformForRead(final GATKSAMRecord read, final RecalibrationArgumentCollection RAC) { GATKSAMReadGroupRecord readGroup = read.getReadGroup(); if (RAC.FORCE_PLATFORM != null && (readGroup.getPlatform() == null || !readGroup.getPlatform().equals(RAC.FORCE_PLATFORM))) { @@ -337,47 +337,47 @@ public class RecalDataManager { } /** - * Parse through the color space of the read and add a new tag to the SAMRecord that says which bases are inconsistent with the color space + * Parse through the color space of the read and add a new tag to the SAMRecord that says which bases are + * inconsistent with the color space. If there is no call in the color space, this method returns true meaning + * this read should be skipped * - * @param read The SAMRecord to parse + * @param strategy the strategy used for SOLID no calls + * @param read The SAMRecord to parse + * @return whether or not this read should be skipped */ - public static void parseColorSpace(final GATKSAMRecord read) { - - // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base - if (ReadUtils.isSOLiDRead(read)) { - if (read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read + public static boolean checkColorSpace(final SOLID_NOCALL_STRATEGY strategy, final GATKSAMRecord read) { + if (ReadUtils.isSOLiDRead(read)) { // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base + if (read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG); if (attr != null) { byte[] colorSpace; - if (attr instanceof String) { + if (attr instanceof String) colorSpace = ((String) attr).getBytes(); - } - else { + else throw new UserException.MalformedBAM(read, String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName())); - } - - // Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read - byte[] readBases = read.getReadBases(); - if (read.getReadNegativeStrandFlag()) { + + byte[] readBases = read.getReadBases(); // Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read + if (read.getReadNegativeStrandFlag()) readBases = BaseUtils.simpleReverseComplement(read.getReadBases()); - } + final byte[] inconsistency = new byte[readBases.length]; - int iii; - byte prevBase = colorSpace[0]; // The sentinel - for (iii = 0; iii < readBases.length; iii++) { - final byte thisBase = getNextBaseFromColor(read, prevBase, colorSpace[iii + 1]); - inconsistency[iii] = (byte) (thisBase == readBases[iii] ? 0 : 1); - prevBase = readBases[iii]; + int i; + byte prevBase = colorSpace[0]; // The sentinel + for (i = 0; i < readBases.length; i++) { + final byte thisBase = getNextBaseFromColor(read, prevBase, colorSpace[i + 1]); + inconsistency[i] = (byte) (thisBase == readBases[i] ? 0 : 1); + prevBase = readBases[i]; } read.setAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency); + } + else if (strategy == SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) // if the strategy calls for an exception, throw it + throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() + " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias."); - } - else { - throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() + - " Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias."); - } + else + return false; // otherwise, just skip the read } } + return true; } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java index d197cc6b6..dde805e8d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalDatum.java @@ -109,7 +109,6 @@ public class RecalDatum extends RecalDatumOptimized { public final void resetCalculatedQualities() { empiricalQuality = 0.0; - estimatedQReported = 0.0; } private double calcExpectedErrors() { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java index 897e1645d..e7a698904 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationReport.java @@ -24,7 +24,7 @@ public class RecalibrationReport { GATKReportTable argumentTable; // keep the argument table untouched just for output purposes RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter | todo -- this should be a new parameter, not necessarily coming from the original table parameter list - private static String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check that needs propagate through the code"; + private static String UNRECOGNIZED_REPORT_TABLE_EXCEPTION = "Unrecognized table. Did you add an extra required covariate? This is a hard check."; public RecalibrationReport(final File RECAL_FILE) { GATKReport report = new GATKReport(RECAL_FILE); @@ -77,10 +77,11 @@ public class RecalibrationReport { /** * Combines two recalibration reports by adding all observations and errors * - * Note: This method DOES NOT recalculate the empirical qualities and quantized qualities. You have to recalculate them - * after combining. The reason for not calculating it is because this function is inteded for combining a series of - * recalibration reports, and it only makes sense to calculate the empirical qualities and quantized qualities after all - * the recalibration reports have been combined. Having the user recalculate when appropriate, makes this method faster + * Note: This method DOES NOT recalculate the empirical qualities and quantized qualities. You have to recalculate + * them after combining. The reason for not calculating it is because this function is inteded for combining a + * series of recalibration reports, and it only makes sense to calculate the empirical qualities and quantized + * qualities after all the recalibration reports have been combined. Having the user recalculate when appropriate, + * makes this method faster * * Note2: The empirical quality reported, however, is recalculated given its simplicity. * @@ -97,7 +98,7 @@ public class RecalibrationReport { if (thisDatum == null) thisDatum = otherDatum; // sometimes the datum in other won't be present in 'this'. So just assign it! else - thisDatum.increment(otherDatum); // add the two datum objects into 'this' + thisDatum.combine(otherDatum); // add the two datum objects into 'this' thisDatum.resetCalculatedQualities(); // reset the empirical quality to make sure the user doesn't forget to recalculate it } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index bf482f5d7..65452f32b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -222,7 +222,7 @@ public class UnifiedGenotyper extends LocusWalker headerInfo = getHeaderInfo(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 62ffc1212..a1e4be786 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -51,6 +51,8 @@ import java.util.*; public class UnifiedGenotyperEngine { public static final String LOW_QUAL_FILTER_NAME = "LowQual"; + + public static final int DEFAULT_PLOIDY = 2; public enum OUTPUT_MODE { /** produces calls only at variant sites */ @@ -95,7 +97,8 @@ public class UnifiedGenotyperEngine { private final Logger logger; private final PrintStream verboseWriter; - // number of chromosomes (2 * samples) in input + // number of chromosomes (ploidy * samples) in input + private final int ploidy; private final int N; // the standard filter to use for calls below the confidence threshold but above the emit threshold @@ -112,11 +115,11 @@ public class UnifiedGenotyperEngine { // --------------------------------------------------------------------------------------------------------- @Requires({"toolkit != null", "UAC != null"}) public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) { - this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), 2*(SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()).size())); + this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), DEFAULT_PLOIDY*(SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()).size())); } - @Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0","N>0"}) - public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, Set samples, int N) { + @Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0","ploidy>0"}) + public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, Set samples, int ploidy) { this.BAQEnabledOnCMDLine = toolkit.getArguments().BAQMode != BAQ.CalculationMode.OFF; genomeLocParser = toolkit.getGenomeLocParser(); this.samples = new TreeSet(samples); @@ -127,7 +130,8 @@ public class UnifiedGenotyperEngine { this.verboseWriter = verboseWriter; this.annotationEngine = engine; - this.N = N; + this.ploidy = ploidy; + this.N = samples.size() * ploidy; log10AlleleFrequencyPriorsSNPs = new double[N+1]; log10AlleleFrequencyPriorsIndels = new double[N+1]; computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, UAC.heterozygosity); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java index bded9001e..fe83dce22 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRGathererUnitTest.java @@ -60,7 +60,7 @@ public class BQSRGathererUnitTest { * @param factor 1 to test for equality, any other value to multiply the original value and match with the calculated */ private void testTablesWithColumnsAndFactor(GATKReportTable original, GATKReportTable calculated, List columnsToTest, int factor) { - for (Object primaryKey : original.getPrimaryKeys()) { // tables don't necessarily have the same primary keys + for (Object primaryKey : original.getPrimaryKeys()) { // tables don't necessarily have the same primary keys for (String column : columnsToTest) { Object actual = calculated.get(primaryKey, column); Object expected = original.get(primaryKey, column);