FisherStrand now computed with and without filtering low-qual bases, and least significant pvalue is kept
-- Old way (filtering for Q > 17 bases) resulted in biased FS when the site was good but there was a systematic shift in the QUAL of REF and ALT between strands of the reads (sometimes happens) -- New way (taking all bases) was consistent with BaseQualRankSum and other tests, but there can be a lot of low qual reference bases on one strand in some techs (ION/PROTON/PACBIO) because of the preference for introducing an indel vs. a mismatch. -- This implementation allows us to have our cake and eat it to by computing both p-values, and taking the maximum one (i.e., least significant). -- No integration tests updated yet -- still exploring the consequences of this change
This commit is contained in:
parent
bedcdbdc5f
commit
2996693c9f
|
|
@ -55,6 +55,8 @@ import java.util.*;
|
||||||
public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
||||||
private static final String FS = "FS";
|
private static final String FS = "FS";
|
||||||
private static final double MIN_PVALUE = 1E-320;
|
private static final double MIN_PVALUE = 1E-320;
|
||||||
|
private static final int MIN_QUAL_FOR_FILTERED_TEST = 17;
|
||||||
|
|
||||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||||
final AnnotatorCompatible walker,
|
final AnnotatorCompatible walker,
|
||||||
final ReferenceContext ref,
|
final ReferenceContext ref,
|
||||||
|
|
@ -64,30 +66,53 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
||||||
if ( !vc.isVariant() )
|
if ( !vc.isVariant() )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
int[][] table;
|
|
||||||
|
|
||||||
if (vc.isSNP() && stratifiedContexts != null) {
|
if (vc.isSNP() && stratifiedContexts != null) {
|
||||||
table = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount());
|
final int[][] tableNoFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), -1);
|
||||||
|
final int[][] tableFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), MIN_QUAL_FOR_FILTERED_TEST);
|
||||||
|
return pValueForBestTable(tableFiltering, tableNoFiltering);
|
||||||
}
|
}
|
||||||
else if (stratifiedPerReadAlleleLikelihoodMap != null) {
|
else if (stratifiedPerReadAlleleLikelihoodMap != null) {
|
||||||
// either SNP with no alignment context, or indels: per-read likelihood map needed
|
// either SNP with no alignment context, or indels: per-read likelihood map needed
|
||||||
table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount());
|
final int[][] table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount());
|
||||||
|
return pValueForBestTable(table, null);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
// for non-snp variants, we need per-read likelihoods.
|
// for non-snp variants, we need per-read likelihoods.
|
||||||
// for snps, we can get same result from simple pileup
|
// for snps, we can get same result from simple pileup
|
||||||
return null;
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
if (table == null)
|
/**
|
||||||
return null;
|
* Create an annotation for the highest (i.e., least significant) p-value of table1 and table2
|
||||||
|
*
|
||||||
|
* @param table1 a contingency table, may be null
|
||||||
|
* @param table2 a contingency table, may be null
|
||||||
|
* @return annotation result for FS given tables
|
||||||
|
*/
|
||||||
|
private Map<String, Object> pValueForBestTable(final int[][] table1, final int[][] table2) {
|
||||||
|
if ( table2 == null )
|
||||||
|
return table1 == null ? null : annotationForOneTable(pValueForContingencyTable(table1));
|
||||||
|
else if (table1 == null)
|
||||||
|
return annotationForOneTable(pValueForContingencyTable(table2));
|
||||||
|
else { // take the one with the best (i.e., least significant pvalue)
|
||||||
|
double pvalue1 = Math.max(pValueForContingencyTable(table1), MIN_PVALUE);
|
||||||
|
double pvalue2 = Math.max(pValueForContingencyTable(table2), MIN_PVALUE);
|
||||||
|
return annotationForOneTable(Math.max(pvalue1, pvalue2));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Double pvalue = Math.max(pValueForContingencyTable(table), MIN_PVALUE);
|
/**
|
||||||
if ( pvalue == null )
|
* Returns an annotation result given a pValue
|
||||||
return null;
|
*
|
||||||
|
* @param pValue
|
||||||
Map<String, Object> map = new HashMap<String, Object>();
|
* @return a hash map from FS -> phred-scaled pValue
|
||||||
map.put(FS, String.format("%.3f", QualityUtils.phredScaleErrorRate(pvalue)));
|
*/
|
||||||
return map;
|
private Map<String, Object> annotationForOneTable(final double pValue) {
|
||||||
|
final Object value = String.format("%.3f", QualityUtils.phredScaleErrorRate(pValue));
|
||||||
|
return Collections.singletonMap(FS, value);
|
||||||
|
// Map<String, Object> map = new HashMap<String, Object>();
|
||||||
|
// map.put(FS, String.format("%.3f", QualityUtils.phredScaleErrorRate(pValue)));
|
||||||
|
// return map;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getKeyNames() {
|
public List<String> getKeyNames() {
|
||||||
|
|
@ -244,7 +269,10 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
||||||
* allele2 # #
|
* allele2 # #
|
||||||
* @return a 2x2 contingency table
|
* @return a 2x2 contingency table
|
||||||
*/
|
*/
|
||||||
private static int[][] getSNPContingencyTable(Map<String, AlignmentContext> stratifiedContexts, Allele ref, Allele alt) {
|
private static int[][] getSNPContingencyTable(final Map<String, AlignmentContext> stratifiedContexts,
|
||||||
|
final Allele ref,
|
||||||
|
final Allele alt,
|
||||||
|
final int minQScoreToConsider ) {
|
||||||
int[][] table = new int[2][2];
|
int[][] table = new int[2][2];
|
||||||
|
|
||||||
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
|
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
|
||||||
|
|
@ -252,8 +280,11 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
||||||
if ( ! RankSumTest.isUsableBase(p, false) || p.getRead().isReducedRead() ) // ignore deletions and reduced reads
|
if ( ! RankSumTest.isUsableBase(p, false) || p.getRead().isReducedRead() ) // ignore deletions and reduced reads
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
Allele base = Allele.create(p.getBase(), false);
|
if ( p.getQual() < minQScoreToConsider || p.getMappingQual() < minQScoreToConsider )
|
||||||
boolean isFW = !p.getRead().getReadNegativeStrandFlag();
|
continue;
|
||||||
|
|
||||||
|
final Allele base = Allele.create(p.getBase(), false);
|
||||||
|
final boolean isFW = !p.getRead().getReadNegativeStrandFlag();
|
||||||
|
|
||||||
final boolean matchesRef = ref.equals(base, true);
|
final boolean matchesRef = ref.equals(base, true);
|
||||||
final boolean matchesAlt = alt.equals(base, true);
|
final boolean matchesAlt = alt.equals(base, true);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue