Merge pull request #622 from broadinstitute/ldg_SORanalysis
Add StrandOddsRatio to default annotations produced by GenotypeGVCFs
This commit is contained in:
commit
c56e493f98
|
|
@ -64,13 +64,16 @@ import java.util.*;
|
|||
* <p> Odds Ratios in the 2x2 contingency table below are R = (X[0][0] * X[1][1]) / (X[0][1] * X[1][0]) and its inverse
|
||||
* + strand - strand
|
||||
* Ref X[0][0] X[0][1]
|
||||
* Alt X[1][0] X[1][0]
|
||||
* Alt X[1][0] X[1][1]
|
||||
* The sum R + 1/R is used to detect a difference in strand bias for ref and for alt (the sum makes it symmetric):
|
||||
* A high value is indicative of large difference where one entry is very small compared to the others.
|
||||
*
|
||||
* A scale factor of refRatio/altRatio where refRatio = (max(X[0][0], X[0][1])/min(X[0][0], X[0][1])) and
|
||||
* altRatio = (max(X[1][0], X[1][1])/min(X[1][0], X[1][1])) ensures that the annotation value is large only
|
||||
* </p>
|
||||
*/
|
||||
public class StrandOddsRatio extends StrandBiasTest implements ActiveRegionBasedAnnotation {
|
||||
private final static double AUGMENTATION_CONSTANT = 0.1;
|
||||
private final static double AUGMENTATION_CONSTANT = 1.0;
|
||||
private static final int MIN_COUNT = 0;
|
||||
|
||||
private static final String SOR = "SOR";
|
||||
|
|
@ -88,20 +91,20 @@ public class StrandOddsRatio extends StrandBiasTest implements ActiveRegionBased
|
|||
if ( vc.hasGenotypes() ) {
|
||||
final int[][] tableFromPerSampleAnnotations = getTableFromSamples( vc.getGenotypes(), MIN_COUNT );
|
||||
if ( tableFromPerSampleAnnotations != null ) {
|
||||
final double ratio = symmetricOddsRatio(tableFromPerSampleAnnotations);
|
||||
final double ratio = calculateSOR(tableFromPerSampleAnnotations);
|
||||
return annotationForOneTable(ratio);
|
||||
}
|
||||
}
|
||||
|
||||
if (vc.isSNP() && stratifiedContexts != null) {
|
||||
final int[][] tableNoFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), -1, MIN_COUNT);
|
||||
final double ratio = symmetricOddsRatio(tableNoFiltering);
|
||||
final double ratio = calculateSOR(tableNoFiltering);
|
||||
return annotationForOneTable(ratio);
|
||||
}
|
||||
else if (stratifiedPerReadAlleleLikelihoodMap != null) {
|
||||
// either SNP with no alignment context, or indels: per-read likelihood map needed
|
||||
final int[][] table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc, MIN_COUNT);
|
||||
final double ratio = symmetricOddsRatio(table);
|
||||
final double ratio = calculateSOR(table);
|
||||
return annotationForOneTable(ratio);
|
||||
}
|
||||
else
|
||||
|
|
@ -111,13 +114,16 @@ public class StrandOddsRatio extends StrandBiasTest implements ActiveRegionBased
|
|||
}
|
||||
|
||||
/**
|
||||
* Computes the symmetric odds ratio of a table after augmentation.
|
||||
* Computes the SOR value of a table after augmentation. Based on the symmetric odds ratio but modified to take on
|
||||
* low values when the reference +/- read count ratio is skewed but the alt count ratio is not. Natural log is taken
|
||||
* to keep values within roughly the same range as other annotations.
|
||||
*
|
||||
* Augmentation avoids quotient by zero.
|
||||
*
|
||||
* @param originalTable The table before augmentation
|
||||
* @return the symmetric odds ratio
|
||||
* @return the SOR annotation value
|
||||
*/
|
||||
final protected double symmetricOddsRatio(final int[][] originalTable) {
|
||||
final protected double calculateSOR(final int[][] originalTable) {
|
||||
final double[][] augmentedTable = augmentContingencyTable(originalTable);
|
||||
|
||||
double ratio = 0;
|
||||
|
|
@ -125,7 +131,12 @@ public class StrandOddsRatio extends StrandBiasTest implements ActiveRegionBased
|
|||
ratio += (augmentedTable[0][0] / augmentedTable[0][1]) * (augmentedTable[1][1] / augmentedTable[1][0]);
|
||||
ratio += (augmentedTable[0][1] / augmentedTable[0][0]) * (augmentedTable[1][0] / augmentedTable[1][1]);
|
||||
|
||||
return ratio;
|
||||
final double refRatio = (Math.min(augmentedTable[0][0], augmentedTable[0][1])/Math.max(augmentedTable[0][0], augmentedTable[0][1]));
|
||||
final double altRatio = (Math.min(augmentedTable[1][0], augmentedTable[1][1])/Math.max(augmentedTable[1][0], augmentedTable[1][1]));
|
||||
|
||||
ratio = ratio*refRatio/altRatio;
|
||||
|
||||
return Math.log(ratio);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -340,12 +340,15 @@ public class VariantDataManager {
|
|||
private static double decodeAnnotation( final String annotationKey, final VariantContext vc, final boolean jitter ) {
|
||||
double value;
|
||||
|
||||
final double LOG_OF_TWO = 0.6931472;
|
||||
|
||||
try {
|
||||
value = vc.getAttributeAsDouble( annotationKey, Double.NaN );
|
||||
if( Double.isInfinite(value) ) { value = Double.NaN; }
|
||||
if( jitter && annotationKey.equalsIgnoreCase("HaplotypeScore") && MathUtils.compareDoubles(value, 0.0, 0.01) == 0 ) { value += 0.01 * GenomeAnalysisEngine.getRandomGenerator().nextGaussian(); }
|
||||
if( jitter && annotationKey.equalsIgnoreCase("FS") && MathUtils.compareDoubles(value, 0.0, 0.01) == 0 ) { value += 0.01 * GenomeAnalysisEngine.getRandomGenerator().nextGaussian(); }
|
||||
if( jitter && annotationKey.equalsIgnoreCase("InbreedingCoeff") && MathUtils.compareDoubles(value, 0.0, 0.01) == 0 ) { value += 0.01 * GenomeAnalysisEngine.getRandomGenerator().nextGaussian(); }
|
||||
if( jitter && annotationKey.equalsIgnoreCase("SOR") && MathUtils.compareDoubles(value, LOG_OF_TWO, 0.01) == 0 ) { value += 0.01 * GenomeAnalysisEngine.getRandomGenerator().nextGaussian(); } //min SOR is 2.0, then we take ln
|
||||
} catch( Exception e ) {
|
||||
value = Double.NaN; // The VQSR works with missing data by marginalizing over the missing dimension when evaluating the Gaussian mixture model
|
||||
}
|
||||
|
|
|
|||
|
|
@ -134,7 +134,7 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
|
|||
*/
|
||||
@Advanced
|
||||
@Argument(fullName="annotation", shortName="A", doc="One or more specific annotations to recompute. The single value 'none' removes the default annotations", required=false)
|
||||
protected List<String> annotationsToUse = new ArrayList<>(Arrays.asList(new String[]{"InbreedingCoeff", "FisherStrand", "QualByDepth", "ChromosomeCounts", "GenotypeSummaries"}));
|
||||
protected List<String> annotationsToUse = new ArrayList<>(Arrays.asList(new String[]{"InbreedingCoeff", "FisherStrand", "QualByDepth", "ChromosomeCounts", "GenotypeSummaries", "StrandOddsRatio"}));
|
||||
|
||||
/**
|
||||
* The rsIDs from this file are used to populate the ID column of the output. Also, the DB INFO flag will be set when appropriate. Note that dbSNP is not used in any way for the calculations themselves.
|
||||
|
|
|
|||
|
|
@ -62,27 +62,28 @@ public class StrandOddsRatioUnitTest {
|
|||
|
||||
@DataProvider(name = "UsingSOR")
|
||||
public Object[][] makeUsingSORData() {
|
||||
final double LOG_OF_TWO = 0.6931472;
|
||||
List<Object[]> tests = new ArrayList<>();
|
||||
tests.add(new Object[]{0, 0, 0, 0, 2.0});
|
||||
tests.add(new Object[]{100000, 100000, 100000, 100000, 2.0} );
|
||||
tests.add(new Object[]{Integer.MAX_VALUE, Integer.MAX_VALUE, Integer.MAX_VALUE, Integer.MAX_VALUE, 2.0} );
|
||||
tests.add(new Object[]{0, 0, 0, 0, LOG_OF_TWO});
|
||||
tests.add(new Object[]{100000, 100000, 100000, 100000, LOG_OF_TWO} );
|
||||
tests.add(new Object[]{Integer.MAX_VALUE, Integer.MAX_VALUE, Integer.MAX_VALUE, Integer.MAX_VALUE, LOG_OF_TWO} );
|
||||
|
||||
tests.add(new Object[]{0, 0, 100000, 100000, 2.0});
|
||||
tests.add(new Object[]{0, 0, Integer.MAX_VALUE, Integer.MAX_VALUE, 2.0});
|
||||
tests.add(new Object[]{0, 0, 100000, 100000, LOG_OF_TWO});
|
||||
tests.add(new Object[]{0, 0, Integer.MAX_VALUE, Integer.MAX_VALUE, LOG_OF_TWO});
|
||||
|
||||
tests.add(new Object[]{100000,100000,100000,0, 1000001.000001});
|
||||
tests.add(new Object[]{100,100,100,0, 1001.000999});
|
||||
tests.add(new Object[]{Integer.MAX_VALUE,Integer.MAX_VALUE,Integer.MAX_VALUE,0, 21474836471.0});
|
||||
tests.add(new Object[]{100000,100000,100000,0, 23.02587});
|
||||
tests.add(new Object[]{100,100,100,0, 9.230339});
|
||||
tests.add(new Object[]{Integer.MAX_VALUE,Integer.MAX_VALUE,Integer.MAX_VALUE,0, 42.97513});
|
||||
|
||||
tests.add(new Object[]{13736,9047,41,1433, 52.95947});
|
||||
tests.add(new Object[]{66, 14, 64, 4, 3.63482});
|
||||
tests.add(new Object[]{351169, 306836, 153739, 2379, 56.48043});
|
||||
tests.add(new Object[]{116449, 131216, 289, 16957, 52.07302});
|
||||
tests.add(new Object[]{137, 159, 9, 23, 2.64460});
|
||||
tests.add(new Object[]{129, 90, 21, 20, 2.09757});
|
||||
tests.add(new Object[]{14054, 9160, 16, 7827, 745.89657});
|
||||
tests.add(new Object[]{32803, 9184, 32117, 3283, 3.10399});
|
||||
tests.add(new Object[]{2068, 6796, 1133, 0, 37235.43791});
|
||||
tests.add(new Object[]{13736,9047,41,1433, 7.061479});
|
||||
tests.add(new Object[]{66, 14, 64, 4, 2.248203});
|
||||
tests.add(new Object[]{351169, 306836, 153739, 2379, 8.066731});
|
||||
tests.add(new Object[]{116449, 131216, 289, 16957, 7.898818});
|
||||
tests.add(new Object[]{137, 159, 9, 23, 1.664854});
|
||||
tests.add(new Object[]{129, 90, 21, 20, 0.4303384});
|
||||
tests.add(new Object[]{14054, 9160, 16, 7827, 12.2645});
|
||||
tests.add(new Object[]{32803, 9184, 32117, 3283, 2.139932});
|
||||
tests.add(new Object[]{2068, 6796, 1133, 0, 14.06701});
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
|
@ -94,7 +95,7 @@ public class StrandOddsRatioUnitTest {
|
|||
contingencyTable[0][1] = refneg;
|
||||
contingencyTable[1][0] = altpos;
|
||||
contingencyTable[1][1] = altneg;
|
||||
final double ratio = new StrandOddsRatio().symmetricOddsRatio(contingencyTable);
|
||||
final double ratio = new StrandOddsRatio().calculateSOR(contingencyTable);
|
||||
Assert.assertEquals(ratio, expectedOddsRatio, DELTA_PRECISION, "Pass");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -69,7 +69,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
|
||||
" -L 20:10,000,000-20,000,000", b37KGReference),
|
||||
1,
|
||||
Arrays.asList("e61d4cbfd21f102c2e6c0bd56cb68312"));
|
||||
Arrays.asList("5487ad609548c30e79a431115dc772ba"));
|
||||
executeTest("combineSingleSamplePipelineGVCF", spec);
|
||||
}
|
||||
|
||||
|
|
@ -81,7 +81,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
|
||||
" --includeNonVariantSites -L 20:10,030,000-10,033,000 -L 20:10,386,000-10,386,500", b37KGReference),
|
||||
1,
|
||||
Arrays.asList("4200fce6f253ef51dc4cd903794270b9"));
|
||||
Arrays.asList("f1dd2b9ab422c0b806392464e466ed91"));
|
||||
executeTest("combineSingleSamplePipelineGVCF_includeNonVariants", spec);
|
||||
}
|
||||
|
||||
|
|
@ -94,7 +94,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
|
||||
" -L 20:10,000,000-20,000,000", b37KGReference),
|
||||
1,
|
||||
Arrays.asList("1c553decc3f9fe1d752137a7f34d7e4e"));
|
||||
Arrays.asList("f7650a8a861dec3138848bb972929002"));
|
||||
executeTest("combineSingleSamplePipelineGVCFHierarchical", spec);
|
||||
}
|
||||
|
||||
|
|
@ -106,7 +106,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
|
||||
" -L 20:10,000,000-11,000,000 --dbsnp " + b37dbSNP132, b37KGReference),
|
||||
1,
|
||||
Arrays.asList("e6952f19e692091332809c59069a1eab"));
|
||||
Arrays.asList("df5a6a574c48c243fad5b44f34343fe3"));
|
||||
executeTest("combineSingleSamplePipelineGVCF_addDbsnp", spec);
|
||||
}
|
||||
|
||||
|
|
@ -116,7 +116,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
"-T GenotypeGVCFs --no_cmdline_in_header -L 1:69485-69791 -o %s -R " + b37KGReference +
|
||||
" -V " + privateTestDir + "gvcfExample1.vcf",
|
||||
1,
|
||||
Arrays.asList("b18ddcf828227c07f1b38d99c91f6e09"));
|
||||
Arrays.asList("b4bb1d21c7a3d793a98b0857c7c5d52b"));
|
||||
executeTest("testJustOneSample", spec);
|
||||
}
|
||||
|
||||
|
|
@ -127,7 +127,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V " + privateTestDir + "gvcfExample1.vcf" +
|
||||
" -V " + privateTestDir + "gvcfExample2.vcf",
|
||||
1,
|
||||
Arrays.asList("a453f2e15b20f8cf0edadb998cb883d5"));
|
||||
Arrays.asList("81c4cc8a6b72c24598ee899df838f1e8"));
|
||||
executeTest("testSamplesWithDifferentLs", spec);
|
||||
}
|
||||
|
||||
|
|
@ -138,12 +138,12 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
"-T GenotypeGVCFs --no_cmdline_in_header -L 1:1115550-1115551 -o %s -R " + hg19Reference +
|
||||
" --variant " + privateTestDir + "combined_genotype_gvcf_exception.vcf",
|
||||
1,
|
||||
Arrays.asList("1dda31e020086a4c5643571fbccd40ba"));
|
||||
Arrays.asList("9626a7108d616d63a2a8069b306c1fe0"));
|
||||
WalkerTestSpec spec2 = new WalkerTestSpec(
|
||||
"-T GenotypeGVCFs --no_cmdline_in_header -L 1:1115550-1115551 -o %s -R " + hg19Reference +
|
||||
" --variant " + privateTestDir + "combined_genotype_gvcf_exception.nocall.vcf",
|
||||
1,
|
||||
Arrays.asList("1dda31e020086a4c5643571fbccd40ba"));
|
||||
Arrays.asList("9626a7108d616d63a2a8069b306c1fe0"));
|
||||
executeTest("testNoPLsException.1", spec1);
|
||||
executeTest("testNoPLsException.2", spec2);
|
||||
}
|
||||
|
|
@ -153,7 +153,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseBPResolutionString("-nda"),
|
||||
1,
|
||||
Arrays.asList("715ef2c2f954b91aad8606188ed1b6c5"));
|
||||
Arrays.asList("27cddeb2287827a58d79fc7d3ddad080"));
|
||||
executeTest("testNDA", spec);
|
||||
}
|
||||
|
||||
|
|
@ -162,7 +162,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseBPResolutionString("-maxAltAlleles 1"),
|
||||
1,
|
||||
Arrays.asList("8973ec01a29e53cd4181f9d8662d9284"));
|
||||
Arrays.asList("8fa78191298b4d8c9b40fba2c705ad56"));
|
||||
executeTest("testMaxAltAlleles", spec);
|
||||
}
|
||||
|
||||
|
|
@ -171,7 +171,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseBPResolutionString("-stand_call_conf 300 -stand_emit_conf 100"),
|
||||
1,
|
||||
Arrays.asList("502ed8ce40c461158520076dd02cc29c"));
|
||||
Arrays.asList("c841b408e41596529009bf7f07de9b3f"));
|
||||
executeTest("testStandardConf", spec);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue