From 68d0e4dd6d27d232532e44e45e275f1662afe846 Mon Sep 17 00:00:00 2001 From: Laurent Francioli Date: Tue, 17 Jul 2012 16:28:11 +0200 Subject: [PATCH 1/4] - Multi-allelic sites are now correctly ignored - Reporting of mendelian violations enhanced - Corrected TP overflow by caping it to Bye.MAX_VALUE -Updated integrationtests to reflect changes in MVF file output Signed-off-by: Eric Banks --- .../walkers/phasing/PhaseByTransmission.java | 25 ++++++++++--------- .../PhaseByTransmissionIntegrationTest.java | 12 ++++----- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java index aa4b4ab78..4760893ca 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java @@ -342,9 +342,10 @@ public class PhaseByTransmission extends RodWalker, HashMa private Genotype getPhasedGenotype(Allele refAllele, Allele altAllele, Genotype genotype, double transmissionProb, Genotype phasedGenotype){ int phredScoreTransmission = -1; - if(transmissionProb != NO_TRANSMISSION_PROB) - phredScoreTransmission = MathUtils.probabilityToPhredScale(1-(transmissionProb)); - + if(transmissionProb != NO_TRANSMISSION_PROB){ + double dphredScoreTransmission = MathUtils.log10ProbabilityToPhredScale(Math.log10(1-(transmissionProb))); + phredScoreTransmission = dphredScoreTransmission < Byte.MAX_VALUE ? (byte)dphredScoreTransmission : Byte.MAX_VALUE; + } //Handle null, missing and unavailable genotypes //Note that only cases where a null/missing/unavailable genotype was passed in the first place can lead to a null/missing/unavailable //genotype so it is safe to return the original genotype in this case. @@ -410,7 +411,7 @@ public class PhaseByTransmission extends RodWalker, HashMa buildMatrices(); if(mvFile != null) - mvFile.println("#CHROM\tPOS\tFILTER\tAC\tFAMILY\tTP\tMOTHER_GT\tMOTHER_DP\tMOTHER_RAD\tMOTHER_AAD\tMOTHER_HRPL\tMOTHER_HETPL\tMOTHER_HAPL\tFATHER_GT\tFATHER_DP\tFATHER_RAD\tFATHER_AAD\tFATHER_HRPL\tFATHER_HETPL\tFATHER_HAPL\tCHILD_GT\tCHILD_DP\tCHILD_RAD\tCHILD_AAD\tCHILD_HRPL\tCHILD_HETPL\tCHILD_HAPL"); + mvFile.println("CHROM\tPOS\tAC\tFAMILY\tTP\tMOTHER_GT\tMOTHER_DP\tMOTHER_AD\tMOTHER_PL\tFATHER_GT\tFATHER_DP\tFATHER_AD\tFATHER_PL\tCHILD_GT\tCHILD_DP\tCHILD_AD\tCHILD_PL"); } @@ -776,7 +777,7 @@ public class PhaseByTransmission extends RodWalker, HashMa return metricsCounters; final VariantContext vc = tracker.getFirstValue(variantCollection.variants, context.getLocation()); - if (vc == null) + if (vc == null || !vc.isBiallelic()) return metricsCounters; final VariantContextBuilder builder = new VariantContextBuilder(vc); @@ -805,8 +806,8 @@ public class PhaseByTransmission extends RodWalker, HashMa if(father != null){ genotypesContext.replace(phasedFather); updateTrioMetricsCounters(phasedMother,phasedFather,phasedChild,mvCount,metricsCounters); - mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t%s:%s:%s:%s\t%s:%s:%s:%s\t%s:%s:%s:%s", - vc.getChr(),vc.getStart(),vc.getFilters(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.toString(), + mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s", + vc.getChr(),vc.getStart(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.getFamilyID(), phasedMother.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getDP(),Arrays.asList(phasedMother.getAD()), phasedMother.getLikelihoodsString(), phasedFather.getGenotypeString(),phasedFather.getDP(),Arrays.asList(phasedFather.getAD()),phasedFather.getLikelihoodsString(), phasedChild.getGenotypeString(),Arrays.asList(phasedChild.getDP()),phasedChild.getAD(),phasedChild.getLikelihoodsString()); @@ -817,8 +818,8 @@ public class PhaseByTransmission extends RodWalker, HashMa updatePairMetricsCounters(phasedMother,phasedChild,mvCount,metricsCounters); if(!(phasedMother.getType()==mother.getType() && phasedChild.getType()==child.getType())) metricsCounters.put(NUM_GENOTYPES_MODIFIED,metricsCounters.get(NUM_GENOTYPES_MODIFIED)+1); - mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t%s:%s:%s:%s\t.:.:.:.\t%s:%s:%s:%s", - vc.getChr(),vc.getStart(),vc.getFilters(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.toString(), + mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s:%s:%s:%s\t.\t.\t.\t.\t%s\t%s\t%s\t%s", + vc.getChr(),vc.getStart(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.getFamilyID(), phasedMother.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getDP(),Arrays.asList(phasedMother.getAD()),phasedMother.getLikelihoodsString(), phasedChild.getGenotypeString(),phasedChild.getDP(),Arrays.asList(phasedChild.getAD()),phasedChild.getLikelihoodsString()); } @@ -828,15 +829,15 @@ public class PhaseByTransmission extends RodWalker, HashMa updatePairMetricsCounters(phasedFather,phasedChild,mvCount,metricsCounters); if(!(phasedFather.getType()==father.getType() && phasedChild.getType()==child.getType())) metricsCounters.put(NUM_GENOTYPES_MODIFIED,metricsCounters.get(NUM_GENOTYPES_MODIFIED)+1); - mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t.:.:.:.\t%s:%s:%s:%s\t%s:%s:%s:%s", - vc.getChr(),vc.getStart(),vc.getFilters(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.toString(), + mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t.\t.\t.\t.\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s", + vc.getChr(),vc.getStart(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.getFamilyID(), phasedFather.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedFather.getGenotypeString(),phasedFather.getDP(),Arrays.asList(phasedFather.getAD()),phasedFather.getLikelihoodsString(), phasedChild.getGenotypeString(),phasedChild.getDP(),Arrays.asList(phasedChild.getAD()),phasedChild.getLikelihoodsString()); } //Report violation if set so //TODO: ADAPT FOR PAIRS TOO!! - if(mvCount>0 && mvFile != null) + if(mvCount>0 && mvFile != null && !vc.isFiltered()) mvFile.println(mvfLine); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java index 19d1e4cb3..9b0fbf650 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java @@ -29,7 +29,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest { "-o %s" ), 2, - Arrays.asList("cd112ec37a9e28d366aff29a85fdcaa0","f8721f4f5d3bae2848ae15c3f120709b") + Arrays.asList("f4b0b5471e03306ee2fad27d88b217b6","f8721f4f5d3bae2848ae15c3f120709b") ); executeTest("testTrueNegativeMV", spec); } @@ -48,7 +48,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest { "-o %s" ), 2, - Arrays.asList("27ccd6feb51de7e7dcdf35f4697fa4eb","547fdfef393f3045a96d245ef6af8acb") + Arrays.asList("dbc64776dcc9e01a468b61e4e0db8277","547fdfef393f3045a96d245ef6af8acb") ); executeTest("testTruePositiveMV", spec); } @@ -67,7 +67,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest { "-o %s" ), 2, - Arrays.asList("719d681bb0a52a40bc854bba107c5c94","9529e2bf214d72e792d93fbea22a3b91") + Arrays.asList("37793e78861bb0bc070884da67dc10e6","9529e2bf214d72e792d93fbea22a3b91") ); executeTest("testFalsePositiveMV", spec); } @@ -86,7 +86,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest { "-o %s" ), 2, - Arrays.asList("7f4a277aee2c7398fcfa84d6c98d5fb3","8c157d79dd00063d2932f0d2b96f53d8") + Arrays.asList("e4da7639bb542d6440975da12b94973f","8c157d79dd00063d2932f0d2b96f53d8") ); executeTest("testSpecialCases", spec); } @@ -108,7 +108,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest { "-o %s" ), 2, - Arrays.asList("44e09d2f9e4d8a9488226d03a97fe999","343e418850ae4a687ebef2acd55fcb07") + Arrays.asList("ab92b714471a000285577d540e1fdc2e","343e418850ae4a687ebef2acd55fcb07") ); executeTest("testPriorOption", spec); } @@ -149,7 +149,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest { "-fatherAlleleFirst" ), 2, - Arrays.asList("60ced3d078792a150a03640b62926857","52ffa82428e63ade22ea37b72ae58492") + Arrays.asList("4b937c1b4e96602a7479b07b59254d06","52ffa82428e63ade22ea37b72ae58492") ); executeTest("testFatherAlleleFirst", spec); } From 4e3780fd4fa814328aead4d2ff5d08647b73796f Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 17 Jul 2012 15:47:43 -0400 Subject: [PATCH 2/4] Updated md5 for PBPP --- .../sting/queue/pipeline/PacbioProcessingPipelineTest.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala index 0989f8d24..6e4c5ed3e 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala @@ -40,7 +40,7 @@ class PacbioProcessingPipelineTest { " -blasr ", " -test ", " -D " + BaseTest.publicTestDir + "exampleDBSNP.vcf").mkString - spec.fileMD5s += testOut -> "cf147e7f56806598371f8d5d6794b852" + spec.fileMD5s += testOut -> "2f2026882a2850bb14a858524158d5a8" PipelineTest.executeTest(spec) } } From 8dbc9cb29c1ef1ae0c46face28dc4d943ba1c863 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 17 Jul 2012 15:52:56 -0400 Subject: [PATCH 3/4] Add the ability to emit the original quals in the OQ tag --- .../gatk/walkers/bqsr/BQSRIntegrationTest.java | 3 ++- .../sting/gatk/GenomeAnalysisEngine.java | 6 +++--- .../gatk/arguments/GATKArgumentCollection.java | 6 ++++++ .../utils/recalibration/BaseRecalibration.java | 15 ++++++++++++++- 4 files changed, 25 insertions(+), 5 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java index b9ea26e64..93dab8505 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java @@ -107,7 +107,8 @@ public class BQSRIntegrationTest extends WalkerTest { {new PRTest("", "d2d6ed8667cdba7e56f5db97d6262676")}, {new PRTest(" -qq -1", "b7053d3d67aba6d8892f0a60f0ded338")}, {new PRTest(" -qq 6", "bfbf0855185b2b70aa35237fb71e4487")}, - {new PRTest(" -DIQ", "66aa65223f192ee39c1773aa187fd493")} + {new PRTest(" -DIQ", "66aa65223f192ee39c1773aa187fd493")}, + {new PRTest(" -EOQ", "")} }; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 5f4f66c89..9d2a138fa 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -198,8 +198,8 @@ public class GenomeAnalysisEngine { private BaseRecalibration baseRecalibration = null; public BaseRecalibration getBaseRecalibration() { return baseRecalibration; } public boolean hasBaseRecalibration() { return baseRecalibration != null; } - public void setBaseRecalibration(final File recalFile, final int quantizationLevels, final boolean disableIndelQuals, final int preserveQLessThan) { - baseRecalibration = new BaseRecalibration(recalFile, quantizationLevels, disableIndelQuals, preserveQLessThan); + public void setBaseRecalibration(final File recalFile, final int quantizationLevels, final boolean disableIndelQuals, final int preserveQLessThan, final boolean emitOriginalQuals) { + baseRecalibration = new BaseRecalibration(recalFile, quantizationLevels, disableIndelQuals, preserveQLessThan, emitOriginalQuals); } /** @@ -239,7 +239,7 @@ public class GenomeAnalysisEngine { // if the use specified an input BQSR recalibration table then enable on the fly recalibration if (args.BQSR_RECAL_FILE != null) - setBaseRecalibration(args.BQSR_RECAL_FILE, args.quantizationLevels, args.disableIndelQuals, args.PRESERVE_QSCORES_LESS_THAN); + setBaseRecalibration(args.BQSR_RECAL_FILE, args.quantizationLevels, args.disableIndelQuals, args.PRESERVE_QSCORES_LESS_THAN, args.emitOriginalQuals); // Determine how the threads should be divided between CPU vs. IO. determineThreadAllocation(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index e31af0b50..45499817e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -220,6 +220,12 @@ public class GATKArgumentCollection { @Argument(fullName="disable_indel_quals", shortName = "DIQ", doc = "If true, disables printing of base insertion and base deletion tags (with -BQSR)", required=false) public boolean disableIndelQuals = false; + /** + * By default, the OQ tag in not emitted when using the -BQSR argument. + */ + @Argument(fullName="emit_original_quals", shortName = "EOQ", doc = "If true, enables printing of the OQ tag with the original base qualities (with -BQSR)", required=false) + public boolean emitOriginalQuals = false; + /** * Do not modify quality scores less than this value but rather just write them out unmodified in the recalibrated BAM file. * In general it's unsafe to change qualities scores below < 6, since base callers use these values to indicate random or bad bases. diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java index 074186305..b5f7ad046 100644 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java @@ -25,11 +25,14 @@ package org.broadinstitute.sting.utils.recalibration; +import net.sf.samtools.SAMTag; +import net.sf.samtools.SAMUtils; import org.broadinstitute.sting.gatk.walkers.bqsr.*; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.collections.NestedIntegerArray; import org.broadinstitute.sting.utils.collections.NestedHashMap; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.io.File; @@ -51,6 +54,7 @@ public class BaseRecalibration { private final boolean disableIndelQuals; private final int preserveQLessThan; + private final boolean emitOriginalQuals; private static final NestedHashMap[] qualityScoreByFullCovariateKey = new NestedHashMap[EventType.values().length]; // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values. static { @@ -66,7 +70,7 @@ public class BaseRecalibration { * @param disableIndelQuals if true, do not emit base indel qualities * @param preserveQLessThan preserve quality scores less than this value */ - public BaseRecalibration(final File RECAL_FILE, final int quantizationLevels, final boolean disableIndelQuals, final int preserveQLessThan) { + public BaseRecalibration(final File RECAL_FILE, final int quantizationLevels, final boolean disableIndelQuals, final int preserveQLessThan, final boolean emitOriginalQuals) { RecalibrationReport recalibrationReport = new RecalibrationReport(RECAL_FILE); recalibrationTables = recalibrationReport.getRecalibrationTables(); @@ -80,6 +84,7 @@ public class BaseRecalibration { readCovariates = new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length); this.disableIndelQuals = disableIndelQuals; this.preserveQLessThan = preserveQLessThan; + this.emitOriginalQuals = emitOriginalQuals; } /** @@ -90,6 +95,14 @@ public class BaseRecalibration { * @param read the read to recalibrate */ public void recalibrateRead(final GATKSAMRecord read) { + if (emitOriginalQuals && read.getAttribute(SAMTag.OQ.name()) == null) { // Save the old qualities if the tag isn't already taken in the read + try { + read.setAttribute(SAMTag.OQ.name(), SAMUtils.phredToFastq(read.getBaseQualities())); + } catch (IllegalArgumentException e) { + throw new UserException.MalformedBAM(read, "illegal base quality encountered; " + e.getMessage()); + } + } + RecalDataManager.computeCovariates(read, requestedCovariates, readCovariates); // compute all covariates for the read for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings if (disableIndelQuals && errorModel != EventType.BASE_SUBSTITUTION) { From 33be41ecf50012bf8234a015d22799cf60182b8e Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 17 Jul 2012 16:06:04 -0400 Subject: [PATCH 4/4] Cleaning up integration test --- .../sting/gatk/walkers/bqsr/BQSRIntegrationTest.java | 3 +-- .../sting/gatk/walkers/bqsr/BaseQualityScoreRecalibrator.java | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java index 93dab8505..b9ea26e64 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java @@ -107,8 +107,7 @@ public class BQSRIntegrationTest extends WalkerTest { {new PRTest("", "d2d6ed8667cdba7e56f5db97d6262676")}, {new PRTest(" -qq -1", "b7053d3d67aba6d8892f0a60f0ded338")}, {new PRTest(" -qq 6", "bfbf0855185b2b70aa35237fb71e4487")}, - {new PRTest(" -DIQ", "66aa65223f192ee39c1773aa187fd493")}, - {new PRTest(" -EOQ", "")} + {new PRTest(" -DIQ", "66aa65223f192ee39c1773aa187fd493")} }; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseQualityScoreRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseQualityScoreRecalibrator.java index 299daebd9..1a99d51e6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseQualityScoreRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseQualityScoreRecalibrator.java @@ -185,7 +185,7 @@ public class BaseQualityScoreRecalibrator extends LocusWalker implem Class c = null; for ( Class REclass : REclasses ) { - if ( REclass.isAssignableFrom(ProtectedPackageSource.class) ) { + if ( ProtectedPackageSource.class.isAssignableFrom(REclass) ) { c = REclass; break; }