diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/FisherStrand.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/FisherStrand.java index 40379b585..f257a5960 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/FisherStrand.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/FisherStrand.java @@ -104,15 +104,15 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation, } if (vc.isSNP() && stratifiedContexts != null) { - final int[][] tableNoFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), -1); - final int[][] tableFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), MIN_QUAL_FOR_FILTERED_TEST); + final int[][] tableNoFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), -1, MIN_COUNT); + final int[][] tableFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), MIN_QUAL_FOR_FILTERED_TEST, MIN_COUNT); printTable("unfiltered", tableNoFiltering); printTable("filtered", tableFiltering); return pValueForBestTable(tableFiltering, tableNoFiltering); } else if (stratifiedPerReadAlleleLikelihoodMap != null) { // either SNP with no alignment context, or indels: per-read likelihood map needed - final int[][] table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc); + final int[][] table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc, MIN_COUNT); //logger.info("VC " + vc); //printTable(table, 0.0); return pValueForBestTable(table, null); @@ -325,114 +325,6 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation, return sum; } - /** - Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this: - * fw rc - * allele1 # # - * allele2 # # - * @return a 2x2 contingency table - */ - public static int[][] getContingencyTable( final Map stratifiedPerReadAlleleLikelihoodMap, final VariantContext vc) { - if( stratifiedPerReadAlleleLikelihoodMap == null ) { throw new IllegalArgumentException("stratifiedPerReadAlleleLikelihoodMap cannot be null"); } - if( vc == null ) { throw new IllegalArgumentException("input vc cannot be null"); } - final Allele ref = vc.getReference(); - final Allele alt = vc.getAltAlleleWithHighestAlleleCount(); - final int[][] table = new int[2][2]; - for (final PerReadAlleleLikelihoodMap maps : stratifiedPerReadAlleleLikelihoodMap.values() ) { - final int[] myTable = new int[4]; - for (final Map.Entry> el : maps.getLikelihoodReadMap().entrySet()) { - final MostLikelyAllele mostLikelyAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue()); - final GATKSAMRecord read = el.getKey(); - updateTable(myTable, mostLikelyAllele.getAlleleIfInformative(), read, ref, alt); - } - if ( passesMinimumThreshold(myTable, MIN_COUNT) ) - copyToMainTable(myTable, table); - } - - return table; - } - - /** - * Helper method to copy the per-sample table to the main table - * - * @param perSampleTable per-sample table (single dimension) - * @param mainTable main table (two dimensions) - */ - private static void copyToMainTable(final int[] perSampleTable, final int[][] mainTable) { - mainTable[0][0] += perSampleTable[0]; - mainTable[0][1] += perSampleTable[1]; - mainTable[1][0] += perSampleTable[2]; - mainTable[1][1] += perSampleTable[3]; - } - - /** - Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this: - * fw rc - * allele1 # # - * allele2 # # - * @return a 2x2 contingency table - */ - private static int[][] getSNPContingencyTable(final Map stratifiedContexts, - final Allele ref, - final Allele alt, - final int minQScoreToConsider ) { - int[][] table = new int[2][2]; - - for ( Map.Entry sample : stratifiedContexts.entrySet() ) { - final int[] myTable = new int[4]; - for (PileupElement p : sample.getValue().getBasePileup()) { - - if ( ! isUsableBase(p) ) // ignore deletions and bad MQ - continue; - - if ( p.getQual() < minQScoreToConsider || p.getMappingQual() < minQScoreToConsider ) - continue; - - updateTable(myTable, Allele.create(p.getBase(), false), p.getRead(), ref, alt); - } - - if ( passesMinimumThreshold( myTable, MIN_COUNT ) ) - copyToMainTable(myTable, table); - } - - return table; - } - - /** - * Can the base in this pileup element be used in comparative tests? - * - * @param p the pileup element to consider - * - * @return true if this base is part of a meaningful read for comparison, false otherwise - */ - private static boolean isUsableBase(final PileupElement p) { - return !( p.isDeletion() || - p.getMappingQual() == 0 || - p.getMappingQual() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE || - ((int) p.getQual()) < QualityUtils.MIN_USABLE_Q_SCORE); - } - - private static void updateTable(final int[] table, final Allele allele, final GATKSAMRecord read, final Allele ref, final Allele alt) { - - final boolean matchesRef = allele.equals(ref, true); - final boolean matchesAlt = allele.equals(alt, true); - - if ( matchesRef || matchesAlt ) { - final int offset = matchesRef ? 0 : 2; - - if ( read.isStrandless() ) { - // a strandless read counts as observations on both strand, at 50% weight, with a minimum of 1 - // (the 1 is to ensure that a strandless read always counts as an observation on both strands, even - // if the read is only seen once, because it's a merged read or other) - table[offset]++; - table[offset + 1]++; - } else { - // a normal read with an actual strand - final boolean isFW = !read.getReadNegativeStrandFlag(); - table[offset + (isFW ? 0 : 1)]++; - } - } - } } diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandBiasBySample.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandBiasBySample.java index 173580699..b5e1a8ec5 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandBiasBySample.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandBiasBySample.java @@ -82,7 +82,7 @@ public class StrandBiasBySample extends GenotypeAnnotation { if ( ! isAppropriateInput(alleleLikelihoodMap, g) ) return; - final int[][] table = FisherStrand.getContingencyTable(Collections.singletonMap(g.getSampleName(), alleleLikelihoodMap), vc); + final int[][] table = FisherStrand.getContingencyTable(Collections.singletonMap(g.getSampleName(), alleleLikelihoodMap), vc, 0); gb.attribute(STRAND_BIAS_BY_SAMPLE_KEY_NAME, FisherStrand.getContingencyArray(table)); } diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandBiasTest.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandBiasTest.java index f47df6d03..435de0f04 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandBiasTest.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandBiasTest.java @@ -46,9 +46,17 @@ package org.broadinstitute.gatk.tools.walkers.annotator; +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.VariantContext; +import org.broadinstitute.gatk.engine.contexts.AlignmentContext; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation; import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.GenotypesContext; +import org.broadinstitute.gatk.utils.QualityUtils; +import org.broadinstitute.gatk.utils.genotyper.MostLikelyAllele; +import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap; +import org.broadinstitute.gatk.utils.pileup.PileupElement; +import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; import java.util.*; @@ -85,6 +93,122 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation { return ( foundData ? decodeSBBS(sbArray) : null ); } + + /** + Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this: + * fw rc + * allele1 # # + * allele2 # # + * @return a 2x2 contingency table + */ + protected static int[][] getSNPContingencyTable(final Map stratifiedContexts, + final Allele ref, + final Allele alt, + final int minQScoreToConsider, + final int minCount ) { + int[][] table = new int[2][2]; + + for ( Map.Entry sample : stratifiedContexts.entrySet() ) { + final int[] myTable = new int[4]; + for (PileupElement p : sample.getValue().getBasePileup()) { + + if ( ! isUsableBase(p) ) // ignore deletions and bad MQ + continue; + + if ( p.getQual() < minQScoreToConsider || p.getMappingQual() < minQScoreToConsider ) + continue; + + updateTable(myTable, Allele.create(p.getBase(), false), p.getRead(), ref, alt); + } + + if ( passesMinimumThreshold( myTable, minCount ) ) + copyToMainTable(myTable, table); + } + + return table; + } + + /** + Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this: + * fw rc + * allele1 # # + * allele2 # # + * @return a 2x2 contingency table + */ + public static int[][] getContingencyTable( final Map stratifiedPerReadAlleleLikelihoodMap, + final VariantContext vc, + final int minCount) { + if( stratifiedPerReadAlleleLikelihoodMap == null ) { throw new IllegalArgumentException("stratifiedPerReadAlleleLikelihoodMap cannot be null"); } + if( vc == null ) { throw new IllegalArgumentException("input vc cannot be null"); } + + final Allele ref = vc.getReference(); + final Allele alt = vc.getAltAlleleWithHighestAlleleCount(); + final int[][] table = new int[2][2]; + + for (final PerReadAlleleLikelihoodMap maps : stratifiedPerReadAlleleLikelihoodMap.values() ) { + final int[] myTable = new int[4]; + for (final Map.Entry> el : maps.getLikelihoodReadMap().entrySet()) { + final MostLikelyAllele mostLikelyAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue()); + final GATKSAMRecord read = el.getKey(); + updateTable(myTable, mostLikelyAllele.getAlleleIfInformative(), read, ref, alt); + } + if ( passesMinimumThreshold(myTable, minCount) ) + copyToMainTable(myTable, table); + } + + return table; + } + + /** + * Helper method to copy the per-sample table to the main table + * + * @param perSampleTable per-sample table (single dimension) + * @param mainTable main table (two dimensions) + */ + private static void copyToMainTable(final int[] perSampleTable, final int[][] mainTable) { + mainTable[0][0] += perSampleTable[0]; + mainTable[0][1] += perSampleTable[1]; + mainTable[1][0] += perSampleTable[2]; + mainTable[1][1] += perSampleTable[3]; + } + + + /** + * Can the base in this pileup element be used in comparative tests? + * + * @param p the pileup element to consider + * + * @return true if this base is part of a meaningful read for comparison, false otherwise + */ + private static boolean isUsableBase(final PileupElement p) { + return !( p.isDeletion() || + p.getMappingQual() == 0 || + p.getMappingQual() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE || + ((int) p.getQual()) < QualityUtils.MIN_USABLE_Q_SCORE); + } + + private static void updateTable(final int[] table, final Allele allele, final GATKSAMRecord read, final Allele ref, final Allele alt) { + + final boolean matchesRef = allele.equals(ref, true); + final boolean matchesAlt = allele.equals(alt, true); + + if ( matchesRef || matchesAlt ) { + final int offset = matchesRef ? 0 : 2; + + if ( read.isStrandless() ) { + // a strandless read counts as observations on both strand, at 50% weight, with a minimum of 1 + // (the 1 is to ensure that a strandless read always counts as an observation on both strands, even + // if the read is only seen once, because it's a merged read or other) + table[offset]++; + table[offset + 1]++; + } else { + // a normal read with an actual strand + final boolean isFW = !read.getReadNegativeStrandFlag(); + table[offset + (isFW ? 0 : 1)]++; + } + } + } + /** * Does this strand data array pass the minimum threshold for inclusion? * diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandOddsRatio.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandOddsRatio.java index e204d9007..58117a168 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandOddsRatio.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/StrandOddsRatio.java @@ -92,7 +92,22 @@ public class StrandOddsRatio extends StrandBiasTest implements ActiveRegionBased return annotationForOneTable(ratio); } } - return null; + + if (vc.isSNP() && stratifiedContexts != null) { + final int[][] tableNoFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), -1, MIN_COUNT); + final double ratio = symmetricOddsRatio(tableNoFiltering); + return annotationForOneTable(ratio); + } + else if (stratifiedPerReadAlleleLikelihoodMap != null) { + // either SNP with no alignment context, or indels: per-read likelihood map needed + final int[][] table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc, MIN_COUNT); + final double ratio = symmetricOddsRatio(table); + return annotationForOneTable(ratio); + } + else + // for non-snp variants, we need per-read likelihoods. + // for snps, we can get same result from simple pileup + return null; } /**