diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/FisherStrand.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/FisherStrand.java
index e52c58187..eb8b17ee4 100644
--- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/FisherStrand.java
+++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/FisherStrand.java
@@ -183,7 +183,7 @@ public class FisherStrand extends StrandBiasTest implements StandardAnnotation,
return list;
}
- private Double pValueForContingencyTable(int[][] originalTable) {
+ public static Double pValueForContingencyTable(int[][] originalTable) {
final int[][] normalizedTable = normalizeContingencyTable(originalTable);
int[][] table = copyContingencyTable(normalizedTable);
diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/rnaseq/ASEReadCounter.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/rnaseq/ASEReadCounter.java
new file mode 100644
index 000000000..f1f9c55d8
--- /dev/null
+++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/rnaseq/ASEReadCounter.java
@@ -0,0 +1,286 @@
+/*
+* Copyright (c) 2012 The Broad Institute
+*
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation
+* files (the "Software"), to deal in the Software without
+* restriction, including without limitation the rights to use,
+* copy, modify, merge, publish, distribute, sublicense, and/or sell
+* copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following
+* conditions:
+*
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+package org.broadinstitute.gatk.tools.walkers.rnaseq;
+
+import htsjdk.variant.variantcontext.VariantContext;
+import org.broadinstitute.gatk.engine.filters.DuplicateReadFilter;
+import org.broadinstitute.gatk.engine.walkers.DisabledReadFilters;
+import org.broadinstitute.gatk.engine.walkers.Downsample;
+import org.broadinstitute.gatk.engine.walkers.LocusWalker;
+import org.broadinstitute.gatk.tools.walkers.coverage.CoverageUtils;
+import org.broadinstitute.gatk.utils.commandline.*;
+import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
+import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
+import org.broadinstitute.gatk.utils.downsampling.DownsampleType;
+import org.broadinstitute.gatk.utils.exceptions.UserException;
+import org.broadinstitute.gatk.utils.pileup.PileupElement;
+import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
+import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
+
+import java.io.PrintStream;
+import java.util.List;
+
+/**
+ * Calculated allele counts in specific loci, to allow allele specific expression analysis.
+ *
+ *
+ * Calculated allele counts in specific loci after filtering based on mapping quality, base quality, depth of coverage, overlapping paired reads and deletion on the position.
+ * All thresholds and option can be set as command line arguments.
+ * The output of this tool is a tab delimited txt file, compatible with Mamba, a downstream tool (http://www.well.ox.ac.uk/~rivas/mamba/).
+ * A user can use -drf DuplicateRead to allow the tool to count also duplicated reads (a useful option for RNAseq data tools)
+ *
+ *
+ * Input
+ *
+ * BAM files (with proper headers) to be analyzed for ASE *
+ *
+ * a VCF file with specific sites to process
+ *
+ *
+ * Output
+ *
+ * Table (tab delimited file) with the specific allele counts
+ *
+ *
+ * Examples
+ *
+ * java -jar GenomeAnalysisTK.jar \
+ * -R ref.fasta \
+ * -T ASEReadCounter \
+ * -o file_name \
+ * -I input.bam \
+ * -sites sites.vcf
+ * -U ALLOW_N_CIGAR_READS
+ * [-minDepth 10] \
+ * [--minMappingQuality 10] \
+ * [--minBaseQuality 2]
+ * [-drf DuplicateRead]
+ *
+ */
+@Downsample(by = DownsampleType.BY_SAMPLE, toCoverage = 10000)
+//@DisabledReadFilters({DuplicateReadFilter.class}) //currently can be disabled using the command line argument -drf DuplicateRead
+public class ASEReadCounter extends LocusWalker {
+
+ @Output
+ public PrintStream out;
+
+ @Input (fullName = "sitesVCFFile",shortName = "sites")
+ public RodBinding sites;
+
+ /**
+ * Loci with total depth lower then this threshold will be skipped. This is set to -1 by default to disable the evaluation and ignore this threshold.
+ */
+ @Argument(fullName = "minDepthOfNonFilteredBase", shortName = "minDepth", doc = "Minimum number of filtered-pass bases that we need to see for reporting this site", required = false, minValue = 0, maxValue = Integer.MAX_VALUE)
+ public int minDepthOfNonFilteredBases = -1;
+
+ /**
+ * Reads with mapping quality values lower than this threshold will be skipped. This is set to -1 by default to disable the evaluation and ignore this threshold.
+ */
+ @Argument(fullName = "minMappingQuality", shortName = "mmq", doc = "Minimum mapping quality of reads to count towards depth", required = false, minValue = 0, maxValue = Integer.MAX_VALUE)
+ public int minMappingQuality = 0;
+
+ /**
+ * Bases with quality scores lower than this threshold will be skipped. This is set to -1 by default to disable the evaluation and ignore this threshold.
+ */
+ @Argument(fullName = "minBaseQuality", shortName = "mbq", doc = "Minimum quality of bases to count towards depth", required = false, minValue = 0, maxValue = Byte.MAX_VALUE)
+ public byte minBaseQuality = 0;
+
+ /**
+ * There are few option to deal with an overlapping read pair.
+ * COUNT_READS - Count all reads independently (even if from the same fragment).
+ * COUNT_FRAGMENTS - Count all fragments (even if the reads that compose the fragment are not consistent at that base).
+ * COUNT_FRAGMENTS_REQUIRE_SAME_BASEQUIRE_SAME_BASE - Count all fragments (but only if the reads that compose the fragment are consistent at that base).
+ * defaults to COUNT_FRAGMENTS_REQUIRE_SAME_BASEQUIRE_SAME_BASE
+ */
+ @Argument(fullName = "countOverlapReadsType", shortName = "overlap", doc = "How should overlapping reads from the same fragment be handled. Current options: COUNT_READS, COUNT_FRAGMENTS and COUNT_FRAGMENTS_REQUIRE_SAME_BASEQUIRE_SAME_BASE (default)", required = false)
+ public CoverageUtils.CountPileupType countType = CoverageUtils.CountPileupType.COUNT_FRAGMENTS_REQUIRE_SAME_BASE;
+
+ /**
+ * Output file format (e.g. csv, table, rtable); defaults to r-readable table.
+ */
+ @Argument(fullName = "outputFormat", doc = "The format of the output file, can be csv, table, rtable", required = false)
+ public String outputFormat = "rtable";
+
+ /**
+ * Consider a spanning deletion as contributing to coverage. Also enables deletion counts in per-base output.
+ */
+ @Advanced
+ @Argument(fullName = "includeDeletions", shortName = "dels", doc = "Include information on deletions", required = false)
+ public boolean includeDeletions = false;
+
+ @Advanced
+ @Argument(fullName = "ignoreDeletionSites", doc = "Ignore sites consisting only of deletions", required = false)
+ public boolean ignoreDeletionSites = false;
+
+ final String[] OUTPUT_FORMATS = {"table","rtable","csv"};
+ public String separator = "\t";
+
+ ////////////////////////////////////////////////////////////////////////////////////
+ // STANDARD WALKER METHODS
+ ////////////////////////////////////////////////////////////////////////////////////
+
+ public boolean includeReadsWithDeletionAtLoci() { return includeDeletions && ! ignoreDeletionSites; }
+
+ public void initialize() {
+
+ // Check the output format
+ boolean goodOutputFormat = false;
+ for ( final String f : OUTPUT_FORMATS ) {
+ goodOutputFormat = goodOutputFormat || f.equals(outputFormat);
+ }
+
+ if ( ! goodOutputFormat ) {
+ throw new IllegalArgumentException("Improper output format. Can be one of table,rtable,csv. Was "+outputFormat);
+ }
+
+ if ( outputFormat.equals("csv") ) {
+ separator = ",";
+ }
+ final String header = "contig"+separator+"position"+separator+"variantID"+separator+"refAllele"+separator+"altAllele"+separator+"refCount"+separator+"altCount"+separator+"totalCount"+separator+"lowMAPQDepth"+separator+"lowBaseQDepth"+separator+"rawDepth"+separator+"otherBases"+separator+"improperPairs";
+ out.println(header);
+
+ }
+
+
+ @Override
+ public String map(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
+ if ( tracker == null )
+ return null;
+ final String contig = context.getLocation().getContig();
+ final long position = context.getPosition();
+
+ final char refAllele = (char)ref.getBase();
+
+ final List VCs = tracker.getValues(sites, context.getLocation());
+ if(VCs != null && VCs.size() > 1)
+ throw new UserException("more then one variant context in position: "+contig+":"+position);
+ if(VCs == null || VCs.size() == 0)
+ return null;
+
+ final VariantContext vc = VCs.get(0);
+ if(!vc.isBiallelic()) {
+ logger.warn("ignore site: cannot run ASE on non-biallelic sites: " + vc.toString());
+ return null;
+ }
+
+ final char altAllele = (char)vc.getAlternateAllele(0).getBases()[0];
+ final String siteID = vc.getID();
+ final ReadBackedPileup pileup = filterPileup(context.getBasePileup(), countType, includeReadsWithDeletionAtLoci());
+
+ // count up the depths of all and QC+ bases
+ return calculateLineForSite(pileup, contig, position, siteID, refAllele, altAllele);
+
+ }
+
+ protected ReadBackedPileup filterPileup(final ReadBackedPileup originalPileup, final CoverageUtils.CountPileupType countType, final boolean includeDeletions){
+
+ ReadBackedPileup pileupWithDeletions;
+ if(countType.equals(CoverageUtils.CountPileupType.COUNT_FRAGMENTS_REQUIRE_SAME_BASE))
+ pileupWithDeletions = originalPileup.getOverlappingFragmentFilteredPileup(true,true);
+ else if(countType.equals(CoverageUtils.CountPileupType.COUNT_READS))
+ pileupWithDeletions = originalPileup;
+ else if(countType.equals(CoverageUtils.CountPileupType.COUNT_FRAGMENTS))
+ pileupWithDeletions = originalPileup.getOverlappingFragmentFilteredPileup(false,true);
+ else
+ throw new UserException("Must use valid CountPileupType");
+
+ return includeDeletions ? pileupWithDeletions: pileupWithDeletions.getPileupWithoutDeletions();
+
+ }
+
+ protected String calculateLineForSite(final ReadBackedPileup pileup, final String contig, final long position, final String siteID, final char refAllele, final char altAllele){
+
+ int rawDepth = 0, lowBaseQDepth = 0, lowMAPQDepth = 0, refCount = 0, altCount = 0, totalNonFilteredCount = 0, otherBasesCount = 0, improperPairsCount = 0 ;
+
+ for (final PileupElement base : pileup) {
+ rawDepth++;
+
+ if (!base.getRead().getProperPairFlag()){
+ improperPairsCount++;
+ continue;
+ }
+ if (base.getMappingQual() < minMappingQuality) {
+ lowMAPQDepth++;
+ continue;
+ }
+
+ if (base.getQual() < minBaseQuality) {
+ lowBaseQDepth++;
+ continue;
+ }
+
+ if(base.getBase() == refAllele)
+ refCount++;
+ else if(base.getBase() == altAllele)
+ altCount++;
+ else {
+ otherBasesCount++;
+ continue;
+ }
+ totalNonFilteredCount++;
+ }
+
+ if(totalNonFilteredCount < minDepthOfNonFilteredBases)
+ return null;
+
+ return contig +separator+
+ position +separator+
+ siteID +separator+
+ refAllele +separator+
+ altAllele +separator+
+ refCount +separator+
+ altCount +separator+
+ totalNonFilteredCount +separator+
+ lowMAPQDepth +separator+
+ lowBaseQDepth +separator+
+ rawDepth +separator+
+ otherBasesCount +separator+
+ improperPairsCount;
+ }
+
+ @Override
+ public Integer reduceInit() {
+ return 0;
+ }
+
+ @Override
+ public Integer reduce(String results, Integer sum) {
+ if(results!= null)
+ out.println(results);
+ return ++sum;
+ }
+
+ @Override
+ public void onTraversalDone(Integer sum) {
+ logger.info("Done processing "+sum+" loci");
+ out.close();
+ }
+
+
+
+
+
+}
diff --git a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/rnaseq/ASEReadCounterIntegrationTest.java b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/rnaseq/ASEReadCounterIntegrationTest.java
new file mode 100644
index 000000000..238ce93c7
--- /dev/null
+++ b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/tools/walkers/rnaseq/ASEReadCounterIntegrationTest.java
@@ -0,0 +1,112 @@
+/*
+* Copyright (c) 2012 The Broad Institute
+*
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation
+* files (the "Software"), to deal in the Software without
+* restriction, including without limitation the rights to use,
+* copy, modify, merge, publish, distribute, sublicense, and/or sell
+* copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following
+* conditions:
+*
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+package org.broadinstitute.gatk.tools.walkers.rnaseq;
+
+import org.broadinstitute.gatk.engine.walkers.WalkerTest;
+import org.testng.annotations.Test;
+
+import java.util.Arrays;
+
+/**
+ * Test all different parameters.
+ */
+public class ASEReadCounterIntegrationTest extends WalkerTest {
+
+ @Test
+ public void testASEReadCounterWithHighMQ() {
+ WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
+ "-T ASEReadCounter -R " + b37KGReference + " -I " + privateTestDir + "NA12878.RNAseq.bam -sites "+privateTestDir +"NA12878.chr20_2444518_2637800.RNAseq.SYNONYMOUS_CODING.vcf -mmq 60 -o %s -U ALLOW_N_CIGAR_READS", 1,
+ Arrays.asList("eec421405a4a570751821d158734020e"));
+ executeTest("test high mq with no read passing", spec);
+ }
+
+ @Test
+ public void testASEReadCounterWithLowMQ() {
+ WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
+ "-T ASEReadCounter -R " + b37KGReference + " -I " + privateTestDir + "NA12878.RNAseq.bam -sites "+privateTestDir +"NA12878.chr20_2444518_2637800.RNAseq.SYNONYMOUS_CODING.vcf -mmq 1 -o %s -U ALLOW_N_CIGAR_READS", 1,
+ Arrays.asList("4f1144be0bb2c4adeae00625afd04cda"));
+ executeTest("test high mq with no read passing", spec);
+ }
+
+ @Test
+ public void testASEReadCounterWithLowMQNoDedup() {
+ WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
+ "-T ASEReadCounter -R " + b37KGReference + " -I " + privateTestDir + "NA12878.RNAseq.bam -sites "+privateTestDir +"NA12878.chr20_2444518_2637800.RNAseq.SYNONYMOUS_CODING.vcf -mmq 10 -o %s -U ALLOW_N_CIGAR_READS -drf DuplicateRead", 1,
+ Arrays.asList("226021673310f28d6520d7f3cfe8cb4b"));
+ executeTest("test high mq with no read passing", spec);
+ }
+
+ @Test
+ public void testASEReadCounterWithHighMQLowBQ() {
+ WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
+ "-T ASEReadCounter -R " + b37KGReference + " -I " + privateTestDir + "NA12878.RNAseq.bam -sites "+privateTestDir +"NA12878.chr20_2444518_2637800.RNAseq.SYNONYMOUS_CODING.vcf -mmq 60 -mbq 10 -o %s -U ALLOW_N_CIGAR_READS", 1,
+ Arrays.asList("f86bf14ca3a2cc0114d6a11de0cd9448"));
+ executeTest("test high mq with no read passing", spec);
+ }
+
+ @Test
+ public void testASEReadCounterWithCountOverlaps() {
+ WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
+ "-T ASEReadCounter -R " + b37KGReference + " -I " + privateTestDir + "NA12878.RNAseq.bam -sites "+privateTestDir +"NA12878.chr20_2444518_2637800.RNAseq.SYNONYMOUS_CODING.vcf -mmq 60 -mbq 10 -o %s -U ALLOW_N_CIGAR_READS -overlap COUNT_FRAGMENTS", 1,
+ Arrays.asList("bfaeaaa8c000eca82f703225bf2257a1"));
+ executeTest("test high mq with no read passing", spec);
+ }
+
+ @Test
+ public void testASEReadCounterWithCountReads() {
+ WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
+ "-T ASEReadCounter -R " + b37KGReference + " -I " + privateTestDir + "NA12878.RNAseq.bam -sites "+privateTestDir +"NA12878.chr20_2444518_2637800.RNAseq.SYNONYMOUS_CODING.vcf -mmq 60 -mbq 10 -o %s -U ALLOW_N_CIGAR_READS -overlap COUNT_READS", 1,
+ Arrays.asList("a9b420fd12a9162b31a48842c4081a1f"));
+ executeTest("test high mq with no read passing", spec);
+ }
+
+
+ @Test
+ public void testASEReadCounterMinDepth70() {
+ WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
+ "-T ASEReadCounter -R " + b37KGReference + " -I " + privateTestDir + "NA12878.RNAseq.bam -sites "+privateTestDir +"NA12878.chr20_2444518_2637800.RNAseq.SYNONYMOUS_CODING.vcf -mmq 60 -mbq 10 -o %s -U ALLOW_N_CIGAR_READS -minDepth 70", 1,
+ Arrays.asList("79b99bc8a1c1c58ac860f79a11f93086"));
+ executeTest("test high mq with no read passing", spec);
+ }
+
+ @Test
+ public void testASEReadCounterFileFormat() {
+ WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
+ "-T ASEReadCounter -R " + b37KGReference + " -I " + privateTestDir + "NA12878.RNAseq.bam -sites "+privateTestDir +"NA12878.chr20_2444518_2637800.RNAseq.SYNONYMOUS_CODING.vcf -mmq 60 -mbq 10 -o %s -U ALLOW_N_CIGAR_READS --outputFormat csv", 1,
+ Arrays.asList("2c7c531018ab353e6874ee2da7980986"));
+ executeTest("test high mq with no read passing", spec);
+ }
+
+
+
+
+
+
+
+
+
+
+}