Merge pull request #288 from broadinstitute/gda_more_ancient_dna_fixes

Feature requested by Reich lab and Paavo lab in Leipzig for ancient DNA ...
This commit is contained in:
delangel 2013-06-17 13:04:21 -07:00
commit a6a58cbc78
5 changed files with 45 additions and 7 deletions

View File

@ -147,9 +147,17 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
// if we only want variants, then we don't need to calculate genotype likelihoods
if ( UAC.OutputMode == UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY )
return builder.make();
// if user requires all PLs at all sites, add all possible alt alleles
else if (UAC.annotateAllSitesWithPLs) {
for ( final byte base : BaseUtils.BASES ) {
if ( base != refBase )
alleles.add(Allele.create(base));
}
}
// otherwise, choose any alternate allele (it doesn't really matter)
alleles.add(Allele.create(BaseUtils.baseIndexToSimpleBase(indexOfRefBase == 0 ? 1 : 0)));
else
// otherwise, choose any alternate allele (it doesn't really matter)
alleles.add(Allele.create(BaseUtils.baseIndexToSimpleBase(indexOfRefBase == 0 ? 1 : 0)));
}
}

View File

@ -52,6 +52,9 @@ import org.broadinstitute.sting.utils.pairhmm.PairHMM;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.VariantContext;
import java.util.Collections;
import java.util.List;
public class UnifiedArgumentCollection extends StandardCallerArgumentCollection {
@Argument(fullName = "genotype_likelihoods_model", shortName = "glm", doc = "Genotype likelihoods calculation model to employ -- SNP is the default option, while INDEL is also available for calling indels and BOTH is available for calling both together", required = false)
@ -95,6 +98,18 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
@Argument(fullName = "max_deletion_fraction", shortName = "deletions", doc = "Maximum fraction of reads with deletions spanning this locus for it to be callable [to disable, set to < 0 or > 1; default:0.05]", required = false)
public Double MAX_DELETION_FRACTION = 0.05;
/**
* Advanced, experimental argument: if SNP likelihood model is specified, and if EMIT_ALL_SITES output mode is set, when we set this argument then we will also emit PLs at all sites.
* This will give a measure of reference confidence and a measure of which alt alleles are more plausible (if any).
* WARNINGS:
* - This feature will inflate VCF file size considerably.
* - All SNP ALT alleles will be emitted with corresponding 10 PL values.
* - An error will be emitted if EMIT_ALL_SITES is not set, or if anything other than diploid SNP model is used
*/
@Advanced
@Argument(fullName = "allSitePLs", shortName = "allSitePLs", doc = "Annotate all sites with PLs", required = false)
public boolean annotateAllSitesWithPLs = false;
// indel-related arguments
/**
* A candidate indel is genotyped (and potentially called) if there are this number of reads with a consensus indel at a site.
@ -247,7 +262,7 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
this.EXCLUDE_FILTERED_REFERENCE_SITES = uac.EXCLUDE_FILTERED_REFERENCE_SITES;
this.IGNORE_LANE_INFO = uac.IGNORE_LANE_INFO;
this.pairHMM = uac.pairHMM;
this.annotateAllSitesWithPLs = uac.annotateAllSitesWithPLs;
// todo- arguments to remove
this.IGNORE_SNP_ALLELES = uac.IGNORE_SNP_ALLELES;
}

View File

@ -168,6 +168,13 @@ public class UnifiedGenotyperEngine {
filter.add(LOW_QUAL_FILTER_NAME);
determineGLModelsToUse();
// do argument checking
if (UAC.annotateAllSitesWithPLs) {
if (!modelsToUse.contains(GenotypeLikelihoodsCalculationModel.Model.SNP))
throw new IllegalArgumentException("Invalid genotype likelihood model specification: Only diploid SNP model can be used in conjunction with option allSitePLs");
}
}
/**
@ -439,7 +446,8 @@ public class UnifiedGenotyperEngine {
bestGuessIsRef = false;
}
// if in GENOTYPE_GIVEN_ALLELES mode, we still want to allow the use of a poor allele
else if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
else if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ||
UAC.annotateAllSitesWithPLs) {
myAlleles.add(alternateAllele);
alleleCountsofMLE.add(AFresult.getAlleleCountAtMLE(alternateAllele));
}
@ -449,7 +457,7 @@ public class UnifiedGenotyperEngine {
// note the math.abs is necessary because -10 * 0.0 => -0.0 which isn't nice
final double phredScaledConfidence =
Math.abs(! bestGuessIsRef || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES
Math.abs(! bestGuessIsRef || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES || UAC.annotateAllSitesWithPLs
? -10 * AFresult.getLog10PosteriorOfAFEq0()
: -10 * AFresult.getLog10PosteriorOfAFGT0());

View File

@ -156,6 +156,14 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
}
@Test
public void emitPLsAtAllSites() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --output_mode EMIT_ALL_SITES -allSitePLs", 1,
Arrays.asList("7cc55db8693759e059a05bc4398f6f69"));
executeTest("test all site PLs 1", spec1);
}
// --------------------------------------------------------------------------------------------------------------
//
// testing heterozygosity

View File

@ -52,6 +52,5 @@ class CollectGcBiasMetrics extends org.broadinstitute.sting.queue.function.JavaC
override def commandLine = super.commandLine +
required("SUMMARY_OUTPUT=" + output) +
required("CHART_OUTPUT=" + output+".pdf") +
required("REFERENCE_SEQUENCE=" + reference) +
required("ASSUME_SORTED=true")
required("REFERENCE_SEQUENCE=" + reference)
}