Allow SOR to be calculated from HC

Refactor StrandBiasTest classes
This commit is contained in:
Laura Gauthier 2014-08-01 13:55:06 -04:00
parent e69b0d6316
commit 5533199402
4 changed files with 144 additions and 113 deletions

View File

@ -104,15 +104,15 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation,
}
if (vc.isSNP() && stratifiedContexts != null) {
final int[][] tableNoFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), -1);
final int[][] tableFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), MIN_QUAL_FOR_FILTERED_TEST);
final int[][] tableNoFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), -1, MIN_COUNT);
final int[][] tableFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), MIN_QUAL_FOR_FILTERED_TEST, MIN_COUNT);
printTable("unfiltered", tableNoFiltering);
printTable("filtered", tableFiltering);
return pValueForBestTable(tableFiltering, tableNoFiltering);
}
else if (stratifiedPerReadAlleleLikelihoodMap != null) {
// either SNP with no alignment context, or indels: per-read likelihood map needed
final int[][] table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc);
final int[][] table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc, MIN_COUNT);
//logger.info("VC " + vc);
//printTable(table, 0.0);
return pValueForBestTable(table, null);
@ -325,114 +325,6 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation,
return sum;
}
/**
Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this:
* fw rc
* allele1 # #
* allele2 # #
* @return a 2x2 contingency table
*/
public static int[][] getContingencyTable( final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap, final VariantContext vc) {
if( stratifiedPerReadAlleleLikelihoodMap == null ) { throw new IllegalArgumentException("stratifiedPerReadAlleleLikelihoodMap cannot be null"); }
if( vc == null ) { throw new IllegalArgumentException("input vc cannot be null"); }
final Allele ref = vc.getReference();
final Allele alt = vc.getAltAlleleWithHighestAlleleCount();
final int[][] table = new int[2][2];
for (final PerReadAlleleLikelihoodMap maps : stratifiedPerReadAlleleLikelihoodMap.values() ) {
final int[] myTable = new int[4];
for (final Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : maps.getLikelihoodReadMap().entrySet()) {
final MostLikelyAllele mostLikelyAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
final GATKSAMRecord read = el.getKey();
updateTable(myTable, mostLikelyAllele.getAlleleIfInformative(), read, ref, alt);
}
if ( passesMinimumThreshold(myTable, MIN_COUNT) )
copyToMainTable(myTable, table);
}
return table;
}
/**
* Helper method to copy the per-sample table to the main table
*
* @param perSampleTable per-sample table (single dimension)
* @param mainTable main table (two dimensions)
*/
private static void copyToMainTable(final int[] perSampleTable, final int[][] mainTable) {
mainTable[0][0] += perSampleTable[0];
mainTable[0][1] += perSampleTable[1];
mainTable[1][0] += perSampleTable[2];
mainTable[1][1] += perSampleTable[3];
}
/**
Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this:
* fw rc
* allele1 # #
* allele2 # #
* @return a 2x2 contingency table
*/
private static int[][] getSNPContingencyTable(final Map<String, AlignmentContext> stratifiedContexts,
final Allele ref,
final Allele alt,
final int minQScoreToConsider ) {
int[][] table = new int[2][2];
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
final int[] myTable = new int[4];
for (PileupElement p : sample.getValue().getBasePileup()) {
if ( ! isUsableBase(p) ) // ignore deletions and bad MQ
continue;
if ( p.getQual() < minQScoreToConsider || p.getMappingQual() < minQScoreToConsider )
continue;
updateTable(myTable, Allele.create(p.getBase(), false), p.getRead(), ref, alt);
}
if ( passesMinimumThreshold( myTable, MIN_COUNT ) )
copyToMainTable(myTable, table);
}
return table;
}
/**
* Can the base in this pileup element be used in comparative tests?
*
* @param p the pileup element to consider
*
* @return true if this base is part of a meaningful read for comparison, false otherwise
*/
private static boolean isUsableBase(final PileupElement p) {
return !( p.isDeletion() ||
p.getMappingQual() == 0 ||
p.getMappingQual() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE ||
((int) p.getQual()) < QualityUtils.MIN_USABLE_Q_SCORE);
}
private static void updateTable(final int[] table, final Allele allele, final GATKSAMRecord read, final Allele ref, final Allele alt) {
final boolean matchesRef = allele.equals(ref, true);
final boolean matchesAlt = allele.equals(alt, true);
if ( matchesRef || matchesAlt ) {
final int offset = matchesRef ? 0 : 2;
if ( read.isStrandless() ) {
// a strandless read counts as observations on both strand, at 50% weight, with a minimum of 1
// (the 1 is to ensure that a strandless read always counts as an observation on both strands, even
// if the read is only seen once, because it's a merged read or other)
table[offset]++;
table[offset + 1]++;
} else {
// a normal read with an actual strand
final boolean isFW = !read.getReadNegativeStrandFlag();
table[offset + (isFW ? 0 : 1)]++;
}
}
}
}

View File

@ -82,7 +82,7 @@ public class StrandBiasBySample extends GenotypeAnnotation {
if ( ! isAppropriateInput(alleleLikelihoodMap, g) )
return;
final int[][] table = FisherStrand.getContingencyTable(Collections.singletonMap(g.getSampleName(), alleleLikelihoodMap), vc);
final int[][] table = FisherStrand.getContingencyTable(Collections.singletonMap(g.getSampleName(), alleleLikelihoodMap), vc, 0);
gb.attribute(STRAND_BIAS_BY_SAMPLE_KEY_NAME, FisherStrand.getContingencyArray(table));
}

View File

@ -46,9 +46,17 @@
package org.broadinstitute.gatk.tools.walkers.annotator;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.GenotypesContext;
import org.broadinstitute.gatk.utils.QualityUtils;
import org.broadinstitute.gatk.utils.genotyper.MostLikelyAllele;
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.gatk.utils.pileup.PileupElement;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import java.util.*;
@ -85,6 +93,122 @@ public abstract class StrandBiasTest extends InfoFieldAnnotation {
return ( foundData ? decodeSBBS(sbArray) : null );
}
/**
Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this:
* fw rc
* allele1 # #
* allele2 # #
* @return a 2x2 contingency table
*/
protected static int[][] getSNPContingencyTable(final Map<String, AlignmentContext> stratifiedContexts,
final Allele ref,
final Allele alt,
final int minQScoreToConsider,
final int minCount ) {
int[][] table = new int[2][2];
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
final int[] myTable = new int[4];
for (PileupElement p : sample.getValue().getBasePileup()) {
if ( ! isUsableBase(p) ) // ignore deletions and bad MQ
continue;
if ( p.getQual() < minQScoreToConsider || p.getMappingQual() < minQScoreToConsider )
continue;
updateTable(myTable, Allele.create(p.getBase(), false), p.getRead(), ref, alt);
}
if ( passesMinimumThreshold( myTable, minCount ) )
copyToMainTable(myTable, table);
}
return table;
}
/**
Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this:
* fw rc
* allele1 # #
* allele2 # #
* @return a 2x2 contingency table
*/
public static int[][] getContingencyTable( final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap,
final VariantContext vc,
final int minCount) {
if( stratifiedPerReadAlleleLikelihoodMap == null ) { throw new IllegalArgumentException("stratifiedPerReadAlleleLikelihoodMap cannot be null"); }
if( vc == null ) { throw new IllegalArgumentException("input vc cannot be null"); }
final Allele ref = vc.getReference();
final Allele alt = vc.getAltAlleleWithHighestAlleleCount();
final int[][] table = new int[2][2];
for (final PerReadAlleleLikelihoodMap maps : stratifiedPerReadAlleleLikelihoodMap.values() ) {
final int[] myTable = new int[4];
for (final Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : maps.getLikelihoodReadMap().entrySet()) {
final MostLikelyAllele mostLikelyAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
final GATKSAMRecord read = el.getKey();
updateTable(myTable, mostLikelyAllele.getAlleleIfInformative(), read, ref, alt);
}
if ( passesMinimumThreshold(myTable, minCount) )
copyToMainTable(myTable, table);
}
return table;
}
/**
* Helper method to copy the per-sample table to the main table
*
* @param perSampleTable per-sample table (single dimension)
* @param mainTable main table (two dimensions)
*/
private static void copyToMainTable(final int[] perSampleTable, final int[][] mainTable) {
mainTable[0][0] += perSampleTable[0];
mainTable[0][1] += perSampleTable[1];
mainTable[1][0] += perSampleTable[2];
mainTable[1][1] += perSampleTable[3];
}
/**
* Can the base in this pileup element be used in comparative tests?
*
* @param p the pileup element to consider
*
* @return true if this base is part of a meaningful read for comparison, false otherwise
*/
private static boolean isUsableBase(final PileupElement p) {
return !( p.isDeletion() ||
p.getMappingQual() == 0 ||
p.getMappingQual() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE ||
((int) p.getQual()) < QualityUtils.MIN_USABLE_Q_SCORE);
}
private static void updateTable(final int[] table, final Allele allele, final GATKSAMRecord read, final Allele ref, final Allele alt) {
final boolean matchesRef = allele.equals(ref, true);
final boolean matchesAlt = allele.equals(alt, true);
if ( matchesRef || matchesAlt ) {
final int offset = matchesRef ? 0 : 2;
if ( read.isStrandless() ) {
// a strandless read counts as observations on both strand, at 50% weight, with a minimum of 1
// (the 1 is to ensure that a strandless read always counts as an observation on both strands, even
// if the read is only seen once, because it's a merged read or other)
table[offset]++;
table[offset + 1]++;
} else {
// a normal read with an actual strand
final boolean isFW = !read.getReadNegativeStrandFlag();
table[offset + (isFW ? 0 : 1)]++;
}
}
}
/**
* Does this strand data array pass the minimum threshold for inclusion?
*

View File

@ -92,7 +92,22 @@ public class StrandOddsRatio extends StrandBiasTest implements ActiveRegionBased
return annotationForOneTable(ratio);
}
}
return null;
if (vc.isSNP() && stratifiedContexts != null) {
final int[][] tableNoFiltering = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount(), -1, MIN_COUNT);
final double ratio = symmetricOddsRatio(tableNoFiltering);
return annotationForOneTable(ratio);
}
else if (stratifiedPerReadAlleleLikelihoodMap != null) {
// either SNP with no alignment context, or indels: per-read likelihood map needed
final int[][] table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc, MIN_COUNT);
final double ratio = symmetricOddsRatio(table);
return annotationForOneTable(ratio);
}
else
// for non-snp variants, we need per-read likelihoods.
// for snps, we can get same result from simple pileup
return null;
}
/**