MaxAltAlleles now defaults to 6, no more MaxAltAllelesForIndels
-- Updated StandardCallerArgumentCollection to remove MaxAltAllelesForIndels. Previous argument is deprecated with meaningful doc message for people to use maxAltAlleles -- All constructores, factory methods, and test builders and their users updated to provide just a single argument -- Updating MD5s for integration tests that change due to genotyping more alleles -- Adding more alleles to genotyping results in slight changes in the QUAL value for multi-allelic loci where one or more alleles aren't polymorphic. That's simply due to the way that alternative hypotheses contribute as reference evidence against each true allele. The effect can be large (new qual = old qual / 2 in one case here). -- If we want more precision in our estimates we could decide (Eric, should we discuss?) to actually separately do a discovery phase in the genotyping, eliminate all variants not considered polymorphic, and then do a final round of calling to get the exact QUAL value for only those that are segregating. This would have the value of having the QUAL stay constant as more alleles are genotyped, at the cost of some code complexity increase and runtime. Might be worth it through
This commit is contained in:
parent
97dc3664c9
commit
90f59803fd
|
|
@ -54,7 +54,7 @@ public class AFCalcTestBuilder {
|
||||||
}
|
}
|
||||||
|
|
||||||
public AFCalc makeModel() {
|
public AFCalc makeModel() {
|
||||||
return AFCalcFactory.createAFCalc(modelType, nSamples, getNumAltAlleles(), getNumAltAlleles(), 2);
|
return AFCalcFactory.createAFCalc(modelType, nSamples, getNumAltAlleles(), 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
public double[] makePriors() {
|
public double[] makePriors() {
|
||||||
|
|
|
||||||
|
|
@ -40,22 +40,20 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
|
||||||
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
|
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
|
||||||
private final static boolean VERBOSE = false;
|
private final static boolean VERBOSE = false;
|
||||||
|
|
||||||
protected GeneralPloidyExactAFCalc(final int nSamples, final int maxAltAlleles, final int maxAltAllelesForIndels, final int ploidy) {
|
protected GeneralPloidyExactAFCalc(final int nSamples, final int maxAltAlleles, final int ploidy) {
|
||||||
super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy);
|
super(nSamples, maxAltAlleles, ploidy);
|
||||||
this.ploidy = ploidy;
|
this.ploidy = ploidy;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected VariantContext reduceScope(VariantContext vc) {
|
protected VariantContext reduceScope(VariantContext vc) {
|
||||||
final int maxAltAlleles = vc.getType().equals(VariantContext.Type.INDEL) ? maxAlternateAllelesForIndels : maxAlternateAllelesToGenotype;
|
|
||||||
|
|
||||||
// don't try to genotype too many alternate alleles
|
// don't try to genotype too many alternate alleles
|
||||||
if ( vc.getAlternateAlleles().size() > maxAltAlleles) {
|
if ( vc.getAlternateAlleles().size() > getMaxAltAlleles()) {
|
||||||
logger.warn("this tool is currently set to genotype at most " + maxAltAlleles + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + (vc.getAlternateAlleles().size()) + " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument");
|
logger.warn("this tool is currently set to genotype at most " + getMaxAltAlleles() + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + (vc.getAlternateAlleles().size()) + " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument");
|
||||||
|
|
||||||
final List<Allele> alleles = new ArrayList<Allele>(maxAltAlleles + 1);
|
final List<Allele> alleles = new ArrayList<Allele>(getMaxAltAlleles() + 1);
|
||||||
alleles.add(vc.getReference());
|
alleles.add(vc.getReference());
|
||||||
alleles.addAll(chooseMostLikelyAlternateAlleles(vc, maxAltAlleles, ploidy));
|
alleles.addAll(chooseMostLikelyAlternateAlleles(vc, getMaxAltAlleles(), ploidy));
|
||||||
|
|
||||||
VariantContextBuilder builder = new VariantContextBuilder(vc);
|
VariantContextBuilder builder = new VariantContextBuilder(vc);
|
||||||
builder.alleles(alleles);
|
builder.alleles(alleles);
|
||||||
|
|
|
||||||
|
|
@ -125,7 +125,7 @@ public class AFCalcUnitTest extends BaseTest {
|
||||||
final List<Genotype> triAllelicSamples = Arrays.asList(AA2, AB2, BB2, AC2, BC2, CC2);
|
final List<Genotype> triAllelicSamples = Arrays.asList(AA2, AB2, BB2, AC2, BC2, CC2);
|
||||||
|
|
||||||
for ( final int nSamples : Arrays.asList(1, 2, 3, 4) ) {
|
for ( final int nSamples : Arrays.asList(1, 2, 3, 4) ) {
|
||||||
List<AFCalc> calcs = AFCalcFactory.createAFCalcs( Arrays.asList( AFCalcFactory.Calculation.values() ), 4, 2, 2, 2);
|
List<AFCalc> calcs = AFCalcFactory.createAFCalcs( Arrays.asList( AFCalcFactory.Calculation.values() ), 4, 2, 2);
|
||||||
|
|
||||||
final int nPriorValues = 2*nSamples+1;
|
final int nPriorValues = 2*nSamples+1;
|
||||||
final double[] flatPriors = MathUtils.normalizeFromLog10(new double[nPriorValues], true); // flat priors
|
final double[] flatPriors = MathUtils.normalizeFromLog10(new double[nPriorValues], true); // flat priors
|
||||||
|
|
@ -215,7 +215,7 @@ public class AFCalcUnitTest extends BaseTest {
|
||||||
samples.addAll(Collections.nCopies(nNonInformative, testData.nonInformative));
|
samples.addAll(Collections.nCopies(nNonInformative, testData.nonInformative));
|
||||||
|
|
||||||
final int nSamples = samples.size();
|
final int nSamples = samples.size();
|
||||||
List<AFCalc> calcs = AFCalcFactory.createAFCalcs(Arrays.asList(AFCalcFactory.Calculation.values()), 4, 2, 2, 2);
|
List<AFCalc> calcs = AFCalcFactory.createAFCalcs(Arrays.asList(AFCalcFactory.Calculation.values()), 4, 2, 2);
|
||||||
|
|
||||||
final double[] priors = MathUtils.normalizeFromLog10(new double[2*nSamples+1], true); // flat priors
|
final double[] priors = MathUtils.normalizeFromLog10(new double[2*nSamples+1], true); // flat priors
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -140,7 +140,7 @@ public class GeneralPloidyAFCalculationModelUnitTest extends BaseTest {
|
||||||
final int len = GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(1 + cfg.numAltAlleles, cfg.ploidy * cfg.GLs.size());
|
final int len = GeneralPloidyGenotypeLikelihoods.getNumLikelihoodElements(1 + cfg.numAltAlleles, cfg.ploidy * cfg.GLs.size());
|
||||||
double[] priors = new double[len]; // flat priors
|
double[] priors = new double[len]; // flat priors
|
||||||
|
|
||||||
final GeneralPloidyExactAFCalc calc = new GeneralPloidyExactAFCalc(cfg.GLs.size(), 1 + cfg.numAltAlleles, 1 + cfg.numAltAlleles, cfg.ploidy);
|
final GeneralPloidyExactAFCalc calc = new GeneralPloidyExactAFCalc(cfg.GLs.size(), 1 + cfg.numAltAlleles, cfg.ploidy);
|
||||||
calc.combineSinglePools(cfg.GLs, 1 + cfg.numAltAlleles, cfg.ploidy, priors);
|
calc.combineSinglePools(cfg.GLs, 1 + cfg.numAltAlleles, cfg.ploidy, priors);
|
||||||
int nameIndex = 1;
|
int nameIndex = 1;
|
||||||
for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) {
|
for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) {
|
||||||
|
|
|
||||||
|
|
@ -31,7 +31,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testHaplotypeCallerMultiSampleGGA() {
|
public void testHaplotypeCallerMultiSampleGGA() {
|
||||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles_for_indels 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "53caa950535749f99d3c5b9bb61c7b60");
|
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "53caa950535749f99d3c5b9bb61c7b60");
|
||||||
}
|
}
|
||||||
|
|
||||||
private void HCTestComplexVariants(String bam, String args, String md5) {
|
private void HCTestComplexVariants(String bam, String args, String md5) {
|
||||||
|
|
|
||||||
|
|
@ -55,20 +55,25 @@ public class StandardCallerArgumentCollection {
|
||||||
* then only this many alleles will be used. Note that genotyping sites with many alternate alleles is both CPU and memory intensive and it
|
* then only this many alleles will be used. Note that genotyping sites with many alternate alleles is both CPU and memory intensive and it
|
||||||
* scales exponentially based on the number of alternate alleles. Unless there is a good reason to change the default value, we highly recommend
|
* scales exponentially based on the number of alternate alleles. Unless there is a good reason to change the default value, we highly recommend
|
||||||
* that you not play around with this parameter.
|
* that you not play around with this parameter.
|
||||||
|
*
|
||||||
|
* As of GATK 2.2 the genotyper can handle a very large number of events, so the default maximum has been increased to 6.
|
||||||
*/
|
*/
|
||||||
@Advanced
|
@Advanced
|
||||||
@Argument(fullName = "max_alternate_alleles", shortName = "maxAltAlleles", doc = "Maximum number of alternate alleles to genotype", required = false)
|
@Argument(fullName = "max_alternate_alleles", shortName = "maxAltAlleles", doc = "Maximum number of alternate alleles to genotype", required = false)
|
||||||
public int MAX_ALTERNATE_ALLELES = 3;
|
public int MAX_ALTERNATE_ALLELES = 6;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* If there are more than this number of alternate alleles presented to the genotyper (either through discovery or GENOTYPE_GIVEN ALLELES),
|
* If there are more than this number of alternate alleles presented to the genotyper (either through discovery or GENOTYPE_GIVEN ALLELES),
|
||||||
* then only this many alleles will be used. Note that genotyping sites with many alternate alleles is both CPU and memory intensive and it
|
* then only this many alleles will be used. Note that genotyping sites with many alternate alleles is both CPU and memory intensive and it
|
||||||
* scales exponentially based on the number of alternate alleles. Unless there is a good reason to change the default value, we highly recommend
|
* scales exponentially based on the number of alternate alleles. Unless there is a good reason to change the default value, we highly recommend
|
||||||
* that you not play around with this parameter.
|
* that you not play around with this parameter.
|
||||||
|
*
|
||||||
|
* This argument has been retired in GATK 2.2. Please specify just maxAltAlleles from now on
|
||||||
*/
|
*/
|
||||||
@Advanced
|
@Deprecated
|
||||||
@Argument(fullName = "max_alternate_alleles_for_indels", shortName = "maxAltAllelesForIndels", doc = "Maximum number of alternate alleles to genotype for indels only", required = false)
|
@Hidden
|
||||||
public int MAX_ALTERNATE_ALLELES_FOR_INDELS = 2;
|
@Argument(fullName = "max_alternate_alleles_for_indels", shortName = "maxAltAllelesForIndels", doc = "This argument has been retired in GATK 2.2. Please specify just maxAltAlleles from now on, which will apply to any variant, regardless of type", required = false)
|
||||||
|
public int MAX_ALTERNATE_ALLELES_FOR_INDELS = -1;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* If this fraction is greater is than zero, the caller will aggressively attempt to remove contamination through biased down-sampling of reads.
|
* If this fraction is greater is than zero, the caller will aggressively attempt to remove contamination through biased down-sampling of reads.
|
||||||
|
|
|
||||||
|
|
@ -45,7 +45,6 @@ public abstract class AFCalc implements Cloneable {
|
||||||
|
|
||||||
protected final int nSamples;
|
protected final int nSamples;
|
||||||
protected final int maxAlternateAllelesToGenotype;
|
protected final int maxAlternateAllelesToGenotype;
|
||||||
protected final int maxAlternateAllelesForIndels;
|
|
||||||
|
|
||||||
protected Logger logger = defaultLogger;
|
protected Logger logger = defaultLogger;
|
||||||
|
|
||||||
|
|
@ -60,19 +59,16 @@ public abstract class AFCalc implements Cloneable {
|
||||||
*
|
*
|
||||||
* @param nSamples number of samples, must be > 0
|
* @param nSamples number of samples, must be > 0
|
||||||
* @param maxAltAlleles maxAltAlleles for SNPs
|
* @param maxAltAlleles maxAltAlleles for SNPs
|
||||||
* @param maxAltAllelesForIndels for indels
|
|
||||||
* @param ploidy the ploidy, must be > 0
|
* @param ploidy the ploidy, must be > 0
|
||||||
*/
|
*/
|
||||||
protected AFCalc(final int nSamples, final int maxAltAlleles, final int maxAltAllelesForIndels, final int ploidy) {
|
protected AFCalc(final int nSamples, final int maxAltAlleles, final int ploidy) {
|
||||||
if ( nSamples < 0 ) throw new IllegalArgumentException("nSamples must be greater than zero " + nSamples);
|
if ( nSamples < 0 ) throw new IllegalArgumentException("nSamples must be greater than zero " + nSamples);
|
||||||
if ( maxAltAlleles < 1 ) throw new IllegalArgumentException("maxAltAlleles must be greater than zero " + maxAltAlleles);
|
if ( maxAltAlleles < 1 ) throw new IllegalArgumentException("maxAltAlleles must be greater than zero " + maxAltAlleles);
|
||||||
if ( maxAltAllelesForIndels < 1 ) throw new IllegalArgumentException("maxAltAllelesForIndels must be greater than zero " + maxAltAllelesForIndels);
|
|
||||||
if ( ploidy < 1 ) throw new IllegalArgumentException("ploidy must be > 0 but got " + ploidy);
|
if ( ploidy < 1 ) throw new IllegalArgumentException("ploidy must be > 0 but got " + ploidy);
|
||||||
|
|
||||||
this.nSamples = nSamples;
|
this.nSamples = nSamples;
|
||||||
this.maxAlternateAllelesToGenotype = maxAltAlleles;
|
this.maxAlternateAllelesToGenotype = maxAltAlleles;
|
||||||
this.maxAlternateAllelesForIndels = maxAltAllelesForIndels;
|
this.stateTracker = new StateTracker(maxAltAlleles);
|
||||||
this.stateTracker = new StateTracker(Math.max(maxAltAlleles, maxAltAllelesForIndels));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -191,7 +187,7 @@ public abstract class AFCalc implements Cloneable {
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
public int getMaxAltAlleles() {
|
public int getMaxAltAlleles() {
|
||||||
return Math.max(maxAlternateAllelesToGenotype, maxAlternateAllelesForIndels);
|
return maxAlternateAllelesToGenotype;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected StateTracker getStateTracker() {
|
protected StateTracker getStateTracker() {
|
||||||
|
|
|
||||||
|
|
@ -91,7 +91,7 @@ public class AFCalcFactory {
|
||||||
public static AFCalc createAFCalc(final UnifiedArgumentCollection UAC,
|
public static AFCalc createAFCalc(final UnifiedArgumentCollection UAC,
|
||||||
final int nSamples,
|
final int nSamples,
|
||||||
final Logger logger) {
|
final Logger logger) {
|
||||||
final int maxAltAlleles = Math.max(UAC.MAX_ALTERNATE_ALLELES, UAC.MAX_ALTERNATE_ALLELES_FOR_INDELS);
|
final int maxAltAlleles = UAC.MAX_ALTERNATE_ALLELES;
|
||||||
if ( ! UAC.AFmodel.usableForParams(UAC.samplePloidy, maxAltAlleles) ) {
|
if ( ! UAC.AFmodel.usableForParams(UAC.samplePloidy, maxAltAlleles) ) {
|
||||||
logger.info("Requested ploidy " + UAC.samplePloidy + " maxAltAlleles " + maxAltAlleles + " not supported by requested model " + UAC.AFmodel + " looking for an option");
|
logger.info("Requested ploidy " + UAC.samplePloidy + " maxAltAlleles " + maxAltAlleles + " not supported by requested model " + UAC.AFmodel + " looking for an option");
|
||||||
final List<Calculation> supportingCalculations = new LinkedList<Calculation>();
|
final List<Calculation> supportingCalculations = new LinkedList<Calculation>();
|
||||||
|
|
@ -109,7 +109,7 @@ public class AFCalcFactory {
|
||||||
logger.info("Selecting model " + UAC.AFmodel);
|
logger.info("Selecting model " + UAC.AFmodel);
|
||||||
}
|
}
|
||||||
|
|
||||||
final AFCalc calc = createAFCalc(UAC.AFmodel, nSamples, UAC.MAX_ALTERNATE_ALLELES, UAC.MAX_ALTERNATE_ALLELES_FOR_INDELS, UAC.samplePloidy);
|
final AFCalc calc = createAFCalc(UAC.AFmodel, nSamples, UAC.MAX_ALTERNATE_ALLELES, UAC.samplePloidy);
|
||||||
|
|
||||||
if ( logger != null ) calc.setLogger(logger);
|
if ( logger != null ) calc.setLogger(logger);
|
||||||
if ( UAC.exactCallsLog != null ) calc.enableProcessLog(UAC.exactCallsLog);
|
if ( UAC.exactCallsLog != null ) calc.enableProcessLog(UAC.exactCallsLog);
|
||||||
|
|
@ -126,7 +126,7 @@ public class AFCalcFactory {
|
||||||
* @return an initialized AFCalc
|
* @return an initialized AFCalc
|
||||||
*/
|
*/
|
||||||
public static AFCalc createAFCalc(final int nSamples) {
|
public static AFCalc createAFCalc(final int nSamples) {
|
||||||
return createAFCalc(chooseBestCalculation(nSamples, 2, 1), nSamples, 2, 2, 2);
|
return createAFCalc(chooseBestCalculation(nSamples, 2, 1), nSamples, 2, 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -139,7 +139,7 @@ public class AFCalcFactory {
|
||||||
* @return an initialized AFCalc
|
* @return an initialized AFCalc
|
||||||
*/
|
*/
|
||||||
public static AFCalc createAFCalc(final Calculation calc, final int nSamples, final int maxAltAlleles) {
|
public static AFCalc createAFCalc(final Calculation calc, final int nSamples, final int maxAltAlleles) {
|
||||||
return createAFCalc(calc, nSamples, maxAltAlleles, maxAltAlleles, 2);
|
return createAFCalc(calc, nSamples, maxAltAlleles, 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -147,14 +147,12 @@ public class AFCalcFactory {
|
||||||
*
|
*
|
||||||
* @param nSamples the number of samples we'll be using
|
* @param nSamples the number of samples we'll be using
|
||||||
* @param maxAltAlleles the max. alt alleles to consider for SNPs
|
* @param maxAltAlleles the max. alt alleles to consider for SNPs
|
||||||
* @param maxAltAllelesForIndels the max. alt alleles to consider for non-SNPs
|
|
||||||
* @param ploidy the sample ploidy. Must be consistent with the calc
|
* @param ploidy the sample ploidy. Must be consistent with the calc
|
||||||
*
|
*
|
||||||
* @return an initialized AFCalc
|
* @return an initialized AFCalc
|
||||||
*/
|
*/
|
||||||
public static AFCalc createAFCalc(final int nSamples, final int maxAltAlleles, final int maxAltAllelesForIndels, final int ploidy) {
|
public static AFCalc createAFCalc(final int nSamples, final int maxAltAlleles, final int ploidy) {
|
||||||
final int maxAlt = Math.max(maxAltAlleles, maxAltAllelesForIndels);
|
return createAFCalc(chooseBestCalculation(nSamples, ploidy, maxAltAlleles), nSamples, maxAltAlleles, ploidy);
|
||||||
return createAFCalc(chooseBestCalculation(nSamples, ploidy, maxAlt), nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -181,20 +179,17 @@ public class AFCalcFactory {
|
||||||
* @param calc the calculation to use
|
* @param calc the calculation to use
|
||||||
* @param nSamples the number of samples we'll be using
|
* @param nSamples the number of samples we'll be using
|
||||||
* @param maxAltAlleles the max. alt alleles to consider for SNPs
|
* @param maxAltAlleles the max. alt alleles to consider for SNPs
|
||||||
* @param maxAltAllelesForIndels the max. alt alleles to consider for non-SNPs
|
|
||||||
* @param ploidy the sample ploidy. Must be consistent with the calc
|
* @param ploidy the sample ploidy. Must be consistent with the calc
|
||||||
*
|
*
|
||||||
* @return an initialized AFCalc
|
* @return an initialized AFCalc
|
||||||
*/
|
*/
|
||||||
public static AFCalc createAFCalc(final Calculation calc, final int nSamples, final int maxAltAlleles, final int maxAltAllelesForIndels, final int ploidy) {
|
public static AFCalc createAFCalc(final Calculation calc, final int nSamples, final int maxAltAlleles, final int ploidy) {
|
||||||
if ( calc == null ) throw new IllegalArgumentException("Calculation cannot be null");
|
if ( calc == null ) throw new IllegalArgumentException("Calculation cannot be null");
|
||||||
if ( nSamples < 0 ) throw new IllegalArgumentException("nSamples must be greater than zero " + nSamples);
|
if ( nSamples < 0 ) throw new IllegalArgumentException("nSamples must be greater than zero " + nSamples);
|
||||||
if ( maxAltAlleles < 1 ) throw new IllegalArgumentException("maxAltAlleles must be greater than zero " + maxAltAlleles);
|
if ( maxAltAlleles < 1 ) throw new IllegalArgumentException("maxAltAlleles must be greater than zero " + maxAltAlleles);
|
||||||
if ( maxAltAllelesForIndels < 1 ) throw new IllegalArgumentException("maxAltAllelesForIndels must be greater than zero " + maxAltAllelesForIndels);
|
|
||||||
if ( ploidy < 1 ) throw new IllegalArgumentException("sample ploidy must be greater than zero " + ploidy);
|
if ( ploidy < 1 ) throw new IllegalArgumentException("sample ploidy must be greater than zero " + ploidy);
|
||||||
|
|
||||||
final int maxAlt = Math.max(maxAltAlleles, maxAltAllelesForIndels);
|
if ( ! calc.usableForParams(ploidy, maxAltAlleles) )
|
||||||
if ( ! calc.usableForParams(ploidy, maxAlt) )
|
|
||||||
throw new IllegalArgumentException("AFCalc " + calc + " does not support requested ploidy " + ploidy);
|
throw new IllegalArgumentException("AFCalc " + calc + " does not support requested ploidy " + ploidy);
|
||||||
|
|
||||||
final Class<? extends AFCalc> afClass = getClassByName(calc.className);
|
final Class<? extends AFCalc> afClass = getClassByName(calc.className);
|
||||||
|
|
@ -202,19 +197,19 @@ public class AFCalcFactory {
|
||||||
throw new IllegalArgumentException("Unexpected AFCalc " + calc);
|
throw new IllegalArgumentException("Unexpected AFCalc " + calc);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
Object args[] = new Object[]{nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy};
|
Object args[] = new Object[]{nSamples, maxAltAlleles, ploidy};
|
||||||
Constructor c = afClass.getDeclaredConstructor(int.class, int.class, int.class, int.class);
|
Constructor c = afClass.getDeclaredConstructor(int.class, int.class, int.class);
|
||||||
return (AFCalc)c.newInstance(args);
|
return (AFCalc)c.newInstance(args);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ReviewedStingException("Could not instantiate AFCalc " + calc, e);
|
throw new ReviewedStingException("Could not instantiate AFCalc " + calc, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static List<AFCalc> createAFCalcs(final List<Calculation> calcs, final int nSamples, final int maxAltAlleles, final int maxAltAllelesForIndels, final int ploidy) {
|
protected static List<AFCalc> createAFCalcs(final List<Calculation> calcs, final int nSamples, final int maxAltAlleles, final int ploidy) {
|
||||||
final List<AFCalc> AFCalcs = new LinkedList<AFCalc>();
|
final List<AFCalc> AFCalcs = new LinkedList<AFCalc>();
|
||||||
|
|
||||||
for ( final Calculation calc : calcs )
|
for ( final Calculation calc : calcs )
|
||||||
AFCalcs.add(createAFCalc(calc, nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy));
|
AFCalcs.add(createAFCalc(calc, nSamples, maxAltAlleles, ploidy));
|
||||||
|
|
||||||
return AFCalcs;
|
return AFCalcs;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -31,8 +31,8 @@ import org.broadinstitute.sting.utils.variantcontext.*;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
public abstract class DiploidExactAFCalc extends ExactAFCalc {
|
public abstract class DiploidExactAFCalc extends ExactAFCalc {
|
||||||
public DiploidExactAFCalc(final int nSamples, final int maxAltAlleles, final int maxAltAllelesForIndels, final int ploidy) {
|
public DiploidExactAFCalc(final int nSamples, final int maxAltAlleles, final int ploidy) {
|
||||||
super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy);
|
super(nSamples, maxAltAlleles, ploidy);
|
||||||
if ( ploidy != 2 ) throw new IllegalArgumentException("ploidy must be two for DiploidExactAFCalc and subclasses but saw " + ploidy);
|
if ( ploidy != 2 ) throw new IllegalArgumentException("ploidy must be two for DiploidExactAFCalc and subclasses but saw " + ploidy);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -75,16 +75,14 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected VariantContext reduceScope(final VariantContext vc) {
|
protected VariantContext reduceScope(final VariantContext vc) {
|
||||||
final int myMaxAltAllelesToGenotype = vc.getType().equals(VariantContext.Type.INDEL) ? maxAlternateAllelesForIndels : maxAlternateAllelesToGenotype;
|
|
||||||
|
|
||||||
// don't try to genotype too many alternate alleles
|
// don't try to genotype too many alternate alleles
|
||||||
if ( vc.getAlternateAlleles().size() > myMaxAltAllelesToGenotype ) {
|
if ( vc.getAlternateAlleles().size() > getMaxAltAlleles() ) {
|
||||||
logger.warn("this tool is currently set to genotype at most " + myMaxAltAllelesToGenotype + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + (vc.getAlternateAlleles().size()) + " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument");
|
logger.warn("this tool is currently set to genotype at most " + getMaxAltAlleles() + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + (vc.getAlternateAlleles().size()) + " alternate alleles so only the top alleles will be used; see the --max_alternate_alleles argument");
|
||||||
|
|
||||||
VariantContextBuilder builder = new VariantContextBuilder(vc);
|
VariantContextBuilder builder = new VariantContextBuilder(vc);
|
||||||
List<Allele> alleles = new ArrayList<Allele>(myMaxAltAllelesToGenotype + 1);
|
List<Allele> alleles = new ArrayList<Allele>(getMaxAltAlleles() + 1);
|
||||||
alleles.add(vc.getReference());
|
alleles.add(vc.getReference());
|
||||||
alleles.addAll(chooseMostLikelyAlternateAlleles(vc, myMaxAltAllelesToGenotype));
|
alleles.addAll(chooseMostLikelyAlternateAlleles(vc, getMaxAltAlleles()));
|
||||||
builder.alleles(alleles);
|
builder.alleles(alleles);
|
||||||
builder.genotypes(VariantContextUtils.subsetDiploidAlleles(vc, alleles, false));
|
builder.genotypes(VariantContextUtils.subsetDiploidAlleles(vc, alleles, false));
|
||||||
return builder.make();
|
return builder.make();
|
||||||
|
|
|
||||||
|
|
@ -39,8 +39,8 @@ import java.util.ArrayList;
|
||||||
abstract class ExactAFCalc extends AFCalc {
|
abstract class ExactAFCalc extends AFCalc {
|
||||||
protected static final int HOM_REF_INDEX = 0; // AA likelihoods are always first
|
protected static final int HOM_REF_INDEX = 0; // AA likelihoods are always first
|
||||||
|
|
||||||
protected ExactAFCalc(final int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, final int ploidy) {
|
protected ExactAFCalc(final int nSamples, int maxAltAlleles, final int ploidy) {
|
||||||
super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy);
|
super(nSamples, maxAltAlleles, ploidy);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -89,7 +89,7 @@ import java.util.*;
|
||||||
/**
|
/**
|
||||||
* The min. confidence of an allele to be included in the joint posterior.
|
* The min. confidence of an allele to be included in the joint posterior.
|
||||||
*/
|
*/
|
||||||
private final static double MIN_LOG10_CONFIDENCE_TO_INCLUDE_ALLELE_IN_POSTERIOR = Math.log10(1e-20);
|
private final static double MIN_LOG10_CONFIDENCE_TO_INCLUDE_ALLELE_IN_POSTERIOR = Math.log10(1e-10);
|
||||||
|
|
||||||
private final static int[] BIALLELIC_NON_INFORMATIVE_PLS = new int[]{0,0,0};
|
private final static int[] BIALLELIC_NON_INFORMATIVE_PLS = new int[]{0,0,0};
|
||||||
private final static List<Allele> BIALLELIC_NOCALL = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
private final static List<Allele> BIALLELIC_NOCALL = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
||||||
|
|
@ -111,9 +111,9 @@ import java.util.*;
|
||||||
*/
|
*/
|
||||||
final AFCalc biAlleleExactModel;
|
final AFCalc biAlleleExactModel;
|
||||||
|
|
||||||
protected IndependentAllelesDiploidExactAFCalc(int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, final int ploidy) {
|
protected IndependentAllelesDiploidExactAFCalc(int nSamples, int maxAltAlleles, final int ploidy) {
|
||||||
super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy);
|
super(nSamples, maxAltAlleles, ploidy);
|
||||||
biAlleleExactModel = new ReferenceDiploidExactAFCalc(nSamples, 1, 1, ploidy);
|
biAlleleExactModel = new ReferenceDiploidExactAFCalc(nSamples, 1, ploidy);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -336,12 +336,13 @@ import java.util.*;
|
||||||
// MLE of altI allele is simply the MLE of this allele in altAlleles
|
// MLE of altI allele is simply the MLE of this allele in altAlleles
|
||||||
alleleCountsOfMLE[altI] = sortedResultWithThetaNPriors.getAlleleCountAtMLE(altAllele);
|
alleleCountsOfMLE[altI] = sortedResultWithThetaNPriors.getAlleleCountAtMLE(altAllele);
|
||||||
|
|
||||||
log10PriorsOfAC[0] += sortedResultWithThetaNPriors.getLog10PriorOfAFEq0();
|
|
||||||
log10PriorsOfAC[1] += sortedResultWithThetaNPriors.getLog10PriorOfAFGT0();
|
|
||||||
|
|
||||||
// the AF > 0 case requires us to store the normalized likelihood for later summation
|
// the AF > 0 case requires us to store the normalized likelihood for later summation
|
||||||
if ( sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0() > MIN_LOG10_CONFIDENCE_TO_INCLUDE_ALLELE_IN_POSTERIOR )
|
if ( sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0() > MIN_LOG10_CONFIDENCE_TO_INCLUDE_ALLELE_IN_POSTERIOR ) {
|
||||||
log10PosteriorOfACEq0Sum += sortedResultWithThetaNPriors.getLog10PosteriorOfAFEq0();
|
log10PosteriorOfACEq0Sum += sortedResultWithThetaNPriors.getLog10PosteriorOfAFEq0();
|
||||||
|
log10PriorsOfAC[0] += sortedResultWithThetaNPriors.getLog10PriorOfAFEq0();
|
||||||
|
log10PriorsOfAC[1] += sortedResultWithThetaNPriors.getLog10PriorOfAFGT0();
|
||||||
|
}
|
||||||
|
|
||||||
log10PosteriorOfACGt0Sum += sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0();
|
log10PosteriorOfACGt0Sum += sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0();
|
||||||
|
|
||||||
// bind pNonRef for allele to the posterior value of the AF > 0 with the new adjusted prior
|
// bind pNonRef for allele to the posterior value of the AF > 0 with the new adjusted prior
|
||||||
|
|
|
||||||
|
|
@ -13,8 +13,8 @@ import java.util.Map;
|
||||||
* Original bi-allelic ~O(N) implementation. Kept here for posterity and reference
|
* Original bi-allelic ~O(N) implementation. Kept here for posterity and reference
|
||||||
*/
|
*/
|
||||||
class OriginalDiploidExactAFCalc extends DiploidExactAFCalc {
|
class OriginalDiploidExactAFCalc extends DiploidExactAFCalc {
|
||||||
protected OriginalDiploidExactAFCalc(int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, final int ploidy) {
|
protected OriginalDiploidExactAFCalc(int nSamples, int maxAltAlleles, final int ploidy) {
|
||||||
super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy);
|
super(nSamples, maxAltAlleles, ploidy);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||||
|
|
||||||
public class ReferenceDiploidExactAFCalc extends DiploidExactAFCalc {
|
public class ReferenceDiploidExactAFCalc extends DiploidExactAFCalc {
|
||||||
protected ReferenceDiploidExactAFCalc(int nSamples, int maxAltAlleles, int maxAltAllelesForIndels, final int ploidy) {
|
protected ReferenceDiploidExactAFCalc(int nSamples, int maxAltAlleles, final int ploidy) {
|
||||||
super(nSamples, maxAltAlleles, maxAltAllelesForIndels, ploidy);
|
super(nSamples, maxAltAlleles, ploidy);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -55,7 +55,7 @@ public class GLBasedSampleSelector extends SampleSelector {
|
||||||
// do we want to apply a prior? maybe user-spec?
|
// do we want to apply a prior? maybe user-spec?
|
||||||
if ( flatPriors == null ) {
|
if ( flatPriors == null ) {
|
||||||
flatPriors = new double[1+2*samples.size()];
|
flatPriors = new double[1+2*samples.size()];
|
||||||
AFCalculator = AFCalcFactory.createAFCalc(samples.size(), 4, 4, 2);
|
AFCalculator = AFCalcFactory.createAFCalc(samples.size(), 4, 2);
|
||||||
}
|
}
|
||||||
final AFCalcResult result = AFCalculator.getLog10PNonRef(subContext, flatPriors);
|
final AFCalcResult result = AFCalculator.getLog10PNonRef(subContext, flatPriors);
|
||||||
// do we want to let this qual go up or down?
|
// do we want to let this qual go up or down?
|
||||||
|
|
|
||||||
|
|
@ -62,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
public void testMultipleSNPAlleles() {
|
public void testMultipleSNPAlleles() {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
|
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
|
||||||
Arrays.asList("73c9b926c5e971a113de347a64fdcf20"));
|
Arrays.asList("543f68e42034bf44cfb24da8c9204320"));
|
||||||
executeTest("test Multiple SNP alleles", spec);
|
executeTest("test Multiple SNP alleles", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -78,7 +78,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
public void testReverseTrim() {
|
public void testReverseTrim() {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
|
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
|
||||||
Arrays.asList("7f8d13690cb7d4173787afa00c496f12"));
|
Arrays.asList("5ce03dd9ca2d9324c1d4a9d64389beb5"));
|
||||||
executeTest("test reverse trim", spec);
|
executeTest("test reverse trim", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue