Merge pull request #288 from broadinstitute/gda_more_ancient_dna_fixes
Feature requested by Reich lab and Paavo lab in Leipzig for ancient DNA ...
This commit is contained in:
commit
a6a58cbc78
|
|
@ -147,9 +147,17 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
||||||
// if we only want variants, then we don't need to calculate genotype likelihoods
|
// if we only want variants, then we don't need to calculate genotype likelihoods
|
||||||
if ( UAC.OutputMode == UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY )
|
if ( UAC.OutputMode == UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY )
|
||||||
return builder.make();
|
return builder.make();
|
||||||
|
// if user requires all PLs at all sites, add all possible alt alleles
|
||||||
|
else if (UAC.annotateAllSitesWithPLs) {
|
||||||
|
for ( final byte base : BaseUtils.BASES ) {
|
||||||
|
if ( base != refBase )
|
||||||
|
alleles.add(Allele.create(base));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// otherwise, choose any alternate allele (it doesn't really matter)
|
else
|
||||||
alleles.add(Allele.create(BaseUtils.baseIndexToSimpleBase(indexOfRefBase == 0 ? 1 : 0)));
|
// otherwise, choose any alternate allele (it doesn't really matter)
|
||||||
|
alleles.add(Allele.create(BaseUtils.baseIndexToSimpleBase(indexOfRefBase == 0 ? 1 : 0)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -52,6 +52,9 @@ import org.broadinstitute.sting.utils.pairhmm.PairHMM;
|
||||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
public class UnifiedArgumentCollection extends StandardCallerArgumentCollection {
|
public class UnifiedArgumentCollection extends StandardCallerArgumentCollection {
|
||||||
|
|
||||||
@Argument(fullName = "genotype_likelihoods_model", shortName = "glm", doc = "Genotype likelihoods calculation model to employ -- SNP is the default option, while INDEL is also available for calling indels and BOTH is available for calling both together", required = false)
|
@Argument(fullName = "genotype_likelihoods_model", shortName = "glm", doc = "Genotype likelihoods calculation model to employ -- SNP is the default option, while INDEL is also available for calling indels and BOTH is available for calling both together", required = false)
|
||||||
|
|
@ -95,6 +98,18 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
|
||||||
@Argument(fullName = "max_deletion_fraction", shortName = "deletions", doc = "Maximum fraction of reads with deletions spanning this locus for it to be callable [to disable, set to < 0 or > 1; default:0.05]", required = false)
|
@Argument(fullName = "max_deletion_fraction", shortName = "deletions", doc = "Maximum fraction of reads with deletions spanning this locus for it to be callable [to disable, set to < 0 or > 1; default:0.05]", required = false)
|
||||||
public Double MAX_DELETION_FRACTION = 0.05;
|
public Double MAX_DELETION_FRACTION = 0.05;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Advanced, experimental argument: if SNP likelihood model is specified, and if EMIT_ALL_SITES output mode is set, when we set this argument then we will also emit PLs at all sites.
|
||||||
|
* This will give a measure of reference confidence and a measure of which alt alleles are more plausible (if any).
|
||||||
|
* WARNINGS:
|
||||||
|
* - This feature will inflate VCF file size considerably.
|
||||||
|
* - All SNP ALT alleles will be emitted with corresponding 10 PL values.
|
||||||
|
* - An error will be emitted if EMIT_ALL_SITES is not set, or if anything other than diploid SNP model is used
|
||||||
|
*/
|
||||||
|
@Advanced
|
||||||
|
@Argument(fullName = "allSitePLs", shortName = "allSitePLs", doc = "Annotate all sites with PLs", required = false)
|
||||||
|
public boolean annotateAllSitesWithPLs = false;
|
||||||
|
|
||||||
// indel-related arguments
|
// indel-related arguments
|
||||||
/**
|
/**
|
||||||
* A candidate indel is genotyped (and potentially called) if there are this number of reads with a consensus indel at a site.
|
* A candidate indel is genotyped (and potentially called) if there are this number of reads with a consensus indel at a site.
|
||||||
|
|
@ -247,7 +262,7 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
|
||||||
this.EXCLUDE_FILTERED_REFERENCE_SITES = uac.EXCLUDE_FILTERED_REFERENCE_SITES;
|
this.EXCLUDE_FILTERED_REFERENCE_SITES = uac.EXCLUDE_FILTERED_REFERENCE_SITES;
|
||||||
this.IGNORE_LANE_INFO = uac.IGNORE_LANE_INFO;
|
this.IGNORE_LANE_INFO = uac.IGNORE_LANE_INFO;
|
||||||
this.pairHMM = uac.pairHMM;
|
this.pairHMM = uac.pairHMM;
|
||||||
|
this.annotateAllSitesWithPLs = uac.annotateAllSitesWithPLs;
|
||||||
// todo- arguments to remove
|
// todo- arguments to remove
|
||||||
this.IGNORE_SNP_ALLELES = uac.IGNORE_SNP_ALLELES;
|
this.IGNORE_SNP_ALLELES = uac.IGNORE_SNP_ALLELES;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -168,6 +168,13 @@ public class UnifiedGenotyperEngine {
|
||||||
filter.add(LOW_QUAL_FILTER_NAME);
|
filter.add(LOW_QUAL_FILTER_NAME);
|
||||||
|
|
||||||
determineGLModelsToUse();
|
determineGLModelsToUse();
|
||||||
|
|
||||||
|
// do argument checking
|
||||||
|
if (UAC.annotateAllSitesWithPLs) {
|
||||||
|
if (!modelsToUse.contains(GenotypeLikelihoodsCalculationModel.Model.SNP))
|
||||||
|
throw new IllegalArgumentException("Invalid genotype likelihood model specification: Only diploid SNP model can be used in conjunction with option allSitePLs");
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -439,7 +446,8 @@ public class UnifiedGenotyperEngine {
|
||||||
bestGuessIsRef = false;
|
bestGuessIsRef = false;
|
||||||
}
|
}
|
||||||
// if in GENOTYPE_GIVEN_ALLELES mode, we still want to allow the use of a poor allele
|
// if in GENOTYPE_GIVEN_ALLELES mode, we still want to allow the use of a poor allele
|
||||||
else if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
else if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ||
|
||||||
|
UAC.annotateAllSitesWithPLs) {
|
||||||
myAlleles.add(alternateAllele);
|
myAlleles.add(alternateAllele);
|
||||||
alleleCountsofMLE.add(AFresult.getAlleleCountAtMLE(alternateAllele));
|
alleleCountsofMLE.add(AFresult.getAlleleCountAtMLE(alternateAllele));
|
||||||
}
|
}
|
||||||
|
|
@ -449,7 +457,7 @@ public class UnifiedGenotyperEngine {
|
||||||
|
|
||||||
// note the math.abs is necessary because -10 * 0.0 => -0.0 which isn't nice
|
// note the math.abs is necessary because -10 * 0.0 => -0.0 which isn't nice
|
||||||
final double phredScaledConfidence =
|
final double phredScaledConfidence =
|
||||||
Math.abs(! bestGuessIsRef || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES
|
Math.abs(! bestGuessIsRef || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES || UAC.annotateAllSitesWithPLs
|
||||||
? -10 * AFresult.getLog10PosteriorOfAFEq0()
|
? -10 * AFresult.getLog10PosteriorOfAFEq0()
|
||||||
: -10 * AFresult.getLog10PosteriorOfAFGT0());
|
: -10 * AFresult.getLog10PosteriorOfAFGT0());
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -156,6 +156,14 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void emitPLsAtAllSites() {
|
||||||
|
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||||
|
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --output_mode EMIT_ALL_SITES -allSitePLs", 1,
|
||||||
|
Arrays.asList("7cc55db8693759e059a05bc4398f6f69"));
|
||||||
|
executeTest("test all site PLs 1", spec1);
|
||||||
|
|
||||||
|
}
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// testing heterozygosity
|
// testing heterozygosity
|
||||||
|
|
|
||||||
|
|
@ -52,6 +52,5 @@ class CollectGcBiasMetrics extends org.broadinstitute.sting.queue.function.JavaC
|
||||||
override def commandLine = super.commandLine +
|
override def commandLine = super.commandLine +
|
||||||
required("SUMMARY_OUTPUT=" + output) +
|
required("SUMMARY_OUTPUT=" + output) +
|
||||||
required("CHART_OUTPUT=" + output+".pdf") +
|
required("CHART_OUTPUT=" + output+".pdf") +
|
||||||
required("REFERENCE_SEQUENCE=" + reference) +
|
required("REFERENCE_SEQUENCE=" + reference)
|
||||||
required("ASSUME_SORTED=true")
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue