Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
b0c2f223ab
|
|
@ -63,27 +63,26 @@ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implemen
|
|||
// Following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT
|
||||
private double calculateTDT( final VariantContext vc, final Set<Sample> triosToTest ) {
|
||||
|
||||
final double nABGivenABandBB = calculateNChildren(vc, triosToTest, HET, HET, HOM);
|
||||
final double nBBGivenABandBB = calculateNChildren(vc, triosToTest, HOM, HET, HOM);
|
||||
final double nABGivenABandBB = calculateNChildren(vc, triosToTest, HET, HET, HOM) + calculateNChildren(vc, triosToTest, HET, HOM, HET);
|
||||
final double nBBGivenABandBB = calculateNChildren(vc, triosToTest, HOM, HET, HOM) + calculateNChildren(vc, triosToTest, HOM, HOM, HET);
|
||||
final double nAAGivenABandAB = calculateNChildren(vc, triosToTest, REF, HET, HET);
|
||||
final double nBBGivenABandAB = calculateNChildren(vc, triosToTest, HOM, HET, HET);
|
||||
final double nAAGivenAAandAB = calculateNChildren(vc, triosToTest, REF, REF, HET);
|
||||
final double nABGivenAAandAB = calculateNChildren(vc, triosToTest, HET, REF, HET);
|
||||
final double nAAGivenAAandAB = calculateNChildren(vc, triosToTest, REF, REF, HET) + calculateNChildren(vc, triosToTest, REF, HET, REF);
|
||||
final double nABGivenAAandAB = calculateNChildren(vc, triosToTest, HET, REF, HET) + calculateNChildren(vc, triosToTest, HET, HET, REF);
|
||||
|
||||
final double numer = (nABGivenABandBB - nBBGivenABandBB) + 2.0 * (nAAGivenABandAB - nBBGivenABandAB) + (nAAGivenAAandAB - nABGivenAAandAB);
|
||||
final double denom = (nABGivenABandBB + nBBGivenABandBB) + 4.0 * (nAAGivenABandAB + nBBGivenABandAB) + (nAAGivenAAandAB + nABGivenAAandAB);
|
||||
return (numer * numer) / denom;
|
||||
}
|
||||
|
||||
private double calculateNChildren( final VariantContext vc, final Set<Sample> triosToTest, final int childIdx, final int momIdx, final int dadIdx ) {
|
||||
final double likelihoodVector[] = new double[triosToTest.size() * 2];
|
||||
private double calculateNChildren( final VariantContext vc, final Set<Sample> triosToTest, final int childIdx, final int parent1Idx, final int parent2Idx ) {
|
||||
final double likelihoodVector[] = new double[triosToTest.size()];
|
||||
int iii = 0;
|
||||
for( final Sample child : triosToTest ) {
|
||||
final double[] momGL = vc.getGenotype(child.getMaternalID()).getLikelihoods().getAsVector();
|
||||
final double[] dadGL = vc.getGenotype(child.getPaternalID()).getLikelihoods().getAsVector();
|
||||
final double[] childGL = vc.getGenotype(child.getID()).getLikelihoods().getAsVector();
|
||||
likelihoodVector[iii++] = momGL[momIdx] + dadGL[dadIdx] + childGL[childIdx];
|
||||
likelihoodVector[iii++] = momGL[dadIdx] + dadGL[momIdx] + childGL[childIdx];
|
||||
likelihoodVector[iii++] = momGL[parent1Idx] + dadGL[parent2Idx] + childGL[childIdx];
|
||||
}
|
||||
|
||||
return MathUtils.sumLog10(likelihoodVector);
|
||||
|
|
|
|||
|
|
@ -34,10 +34,17 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
*/
|
||||
public class AlleleFrequencyCalculationResult {
|
||||
|
||||
// note that the cell at position zero in the likelihoods/posteriors array is actually probability of non-ref (since it's marginalized over all alleles)
|
||||
// IMPORTANT NOTE:
|
||||
// These 2 arrays are intended to contain the likelihoods/posterior probabilities for each alternate allele over each possible frequency (from 0 to 2N).
|
||||
// For any given alternate allele and frequency, the likelihoods are marginalized over values for all other alternate alleles. What this means is that
|
||||
// the likelihoods at cell index zero (AF=0) in the array is actually that of the site's being polymorphic (because although this alternate allele may
|
||||
// be at AF=0, it is marginalized over all other alternate alleles which are not necessarily at AF=0).
|
||||
// In the bi-allelic case (where there are no other alternate alleles over which to marginalize),
|
||||
// the value at cell index zero will be equal to AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED.
|
||||
final double[][] log10AlleleFrequencyLikelihoods;
|
||||
final double[][] log10AlleleFrequencyPosteriors;
|
||||
|
||||
// These 2 variables are intended to contain the likelihood/posterior probability for the site's being monomorphic (i.e. AF=0 for all alternate alleles)
|
||||
double log10LikelihoodOfAFzero = 0.0;
|
||||
double log10PosteriorOfAFzero = 0.0;
|
||||
|
||||
|
|
|
|||
|
|
@ -493,12 +493,12 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
if (DISCORDANCE_ONLY) {
|
||||
Collection<VariantContext> compVCs = tracker.getValues(discordanceTrack, context.getLocation());
|
||||
if (!isDiscordant(vc, compVCs))
|
||||
return 0;
|
||||
continue;
|
||||
}
|
||||
if (CONCORDANCE_ONLY) {
|
||||
Collection<VariantContext> compVCs = tracker.getValues(concordanceTrack, context.getLocation());
|
||||
if (!isConcordant(vc, compVCs))
|
||||
return 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (alleleRestriction.equals(NumberAlleleRestriction.BIALLELIC) && !vc.isBiallelic())
|
||||
|
|
@ -512,16 +512,20 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
|
||||
VariantContext sub = subsetRecord(vc, samples);
|
||||
if ( (sub.isPolymorphicInSamples() || !EXCLUDE_NON_VARIANTS) && (!sub.isFiltered() || !EXCLUDE_FILTERED) ) {
|
||||
boolean failedJexlMatch = false;
|
||||
for ( VariantContextUtils.JexlVCMatchExp jexl : jexls ) {
|
||||
if ( !VariantContextUtils.match(sub, jexl) ) {
|
||||
return 0;
|
||||
failedJexlMatch = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (SELECT_RANDOM_NUMBER) {
|
||||
randomlyAddVariant(++variantNumber, sub);
|
||||
}
|
||||
else if (!SELECT_RANDOM_FRACTION || ( GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) {
|
||||
vcfWriter.add(sub);
|
||||
if ( !failedJexlMatch ) {
|
||||
if (SELECT_RANDOM_NUMBER) {
|
||||
randomlyAddVariant(++variantNumber, sub);
|
||||
}
|
||||
else if (!SELECT_RANDOM_FRACTION || ( GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) {
|
||||
vcfWriter.add(sub);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -171,7 +171,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testTDTAnnotation() {
|
||||
final String MD5 = "9fe37b61aab695ad47ce3c587148e91f";
|
||||
final String MD5 = "204e67536a17af7eaa6bf0a910818997";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T VariantAnnotator -R " + b37KGReference + " -A TransmissionDisequilibriumTest --variant:vcf " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf" +
|
||||
" -L " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf -NO_HEADER -ped " + validationDataLocation + "ug.random50000.family.ped -o %s", 1,
|
||||
|
|
|
|||
|
|
@ -116,6 +116,19 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
executeTest("testUsingDbsnpName--" + testFile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultipleRecordsAtOnePosition() {
|
||||
String testFile = validationDataLocation + "selectVariants.onePosition.vcf";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + b36KGReference + " -select 'KG_FREQ < 0.5' --variant " + testFile + " -o %s -NO_HEADER",
|
||||
1,
|
||||
Arrays.asList("20b52c96f5c48258494d072752b53693")
|
||||
);
|
||||
|
||||
executeTest("testMultipleRecordsAtOnePositionFirstIsFiltered--" + testFile, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParallelization() {
|
||||
String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf";
|
||||
|
|
|
|||
Loading…
Reference in New Issue