diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java index ce5f94478..360f88e51 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java @@ -147,9 +147,17 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC // if we only want variants, then we don't need to calculate genotype likelihoods if ( UAC.OutputMode == UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY ) return builder.make(); + // if user requires all PLs at all sites, add all possible alt alleles + else if (UAC.annotateAllSitesWithPLs) { + for ( final byte base : BaseUtils.BASES ) { + if ( base != refBase ) + alleles.add(Allele.create(base)); + } + } - // otherwise, choose any alternate allele (it doesn't really matter) - alleles.add(Allele.create(BaseUtils.baseIndexToSimpleBase(indexOfRefBase == 0 ? 1 : 0))); + else + // otherwise, choose any alternate allele (it doesn't really matter) + alleles.add(Allele.create(BaseUtils.baseIndexToSimpleBase(indexOfRefBase == 0 ? 1 : 0))); } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index e346b10b7..b96b5733f 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -52,6 +52,9 @@ import org.broadinstitute.sting.utils.pairhmm.PairHMM; import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.variantcontext.VariantContext; +import java.util.Collections; +import java.util.List; + public class UnifiedArgumentCollection extends StandardCallerArgumentCollection { @Argument(fullName = "genotype_likelihoods_model", shortName = "glm", doc = "Genotype likelihoods calculation model to employ -- SNP is the default option, while INDEL is also available for calling indels and BOTH is available for calling both together", required = false) @@ -95,6 +98,18 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection @Argument(fullName = "max_deletion_fraction", shortName = "deletions", doc = "Maximum fraction of reads with deletions spanning this locus for it to be callable [to disable, set to < 0 or > 1; default:0.05]", required = false) public Double MAX_DELETION_FRACTION = 0.05; + /** + * Advanced, experimental argument: if SNP likelihood model is specified, and if EMIT_ALL_SITES output mode is set, when we set this argument then we will also emit PLs at all sites. + * This will give a measure of reference confidence and a measure of which alt alleles are more plausible (if any). + * WARNINGS: + * - This feature will inflate VCF file size considerably. + * - All SNP ALT alleles will be emitted with corresponding 10 PL values. + * - An error will be emitted if EMIT_ALL_SITES is not set, or if anything other than diploid SNP model is used + */ + @Advanced + @Argument(fullName = "allSitePLs", shortName = "allSitePLs", doc = "Annotate all sites with PLs", required = false) + public boolean annotateAllSitesWithPLs = false; + // indel-related arguments /** * A candidate indel is genotyped (and potentially called) if there are this number of reads with a consensus indel at a site. @@ -247,7 +262,7 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection this.EXCLUDE_FILTERED_REFERENCE_SITES = uac.EXCLUDE_FILTERED_REFERENCE_SITES; this.IGNORE_LANE_INFO = uac.IGNORE_LANE_INFO; this.pairHMM = uac.pairHMM; - + this.annotateAllSitesWithPLs = uac.annotateAllSitesWithPLs; // todo- arguments to remove this.IGNORE_SNP_ALLELES = uac.IGNORE_SNP_ALLELES; } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 3d9f75d45..9f3368cf8 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -168,6 +168,13 @@ public class UnifiedGenotyperEngine { filter.add(LOW_QUAL_FILTER_NAME); determineGLModelsToUse(); + + // do argument checking + if (UAC.annotateAllSitesWithPLs) { + if (!modelsToUse.contains(GenotypeLikelihoodsCalculationModel.Model.SNP)) + throw new IllegalArgumentException("Invalid genotype likelihood model specification: Only diploid SNP model can be used in conjunction with option allSitePLs"); + + } } /** @@ -439,7 +446,8 @@ public class UnifiedGenotyperEngine { bestGuessIsRef = false; } // if in GENOTYPE_GIVEN_ALLELES mode, we still want to allow the use of a poor allele - else if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { + else if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES || + UAC.annotateAllSitesWithPLs) { myAlleles.add(alternateAllele); alleleCountsofMLE.add(AFresult.getAlleleCountAtMLE(alternateAllele)); } @@ -449,7 +457,7 @@ public class UnifiedGenotyperEngine { // note the math.abs is necessary because -10 * 0.0 => -0.0 which isn't nice final double phredScaledConfidence = - Math.abs(! bestGuessIsRef || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES + Math.abs(! bestGuessIsRef || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES || UAC.annotateAllSitesWithPLs ? -10 * AFresult.getLog10PosteriorOfAFEq0() : -10 * AFresult.getLog10PosteriorOfAFGT0()); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 300d7f5da..3eb9b4e1c 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -156,6 +156,14 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { } + @Test + public void emitPLsAtAllSites() { + WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( + baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --output_mode EMIT_ALL_SITES -allSitePLs", 1, + Arrays.asList("7cc55db8693759e059a05bc4398f6f69")); + executeTest("test all site PLs 1", spec1); + + } // -------------------------------------------------------------------------------------------------------------- // // testing heterozygosity diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala index 5d887016e..7c4c3f26a 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala @@ -52,6 +52,5 @@ class CollectGcBiasMetrics extends org.broadinstitute.sting.queue.function.JavaC override def commandLine = super.commandLine + required("SUMMARY_OUTPUT=" + output) + required("CHART_OUTPUT=" + output+".pdf") + - required("REFERENCE_SEQUENCE=" + reference) + - required("ASSUME_SORTED=true") + required("REFERENCE_SEQUENCE=" + reference) }