Added splitContextByReadGroup() and fixed bug in getPileupForReadGroup() that resulted in a NPE when no reads where present for a read group.

Added doc string for getNBoundRodTracks() Intermediate commit for CalibrateGenotypeLikelihoods and GenotypeConcordanceTable, so I have a record of my work. Not ready for public consumption. Really looking forward to making local commits so I can track my progress without needing to push incomplete functionality up to the server. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5807 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-15 17:36:07 +00:00 · 2011-05-15 17:36:07 +00:00 · 75db4705ab
parent 9423652ad8
commit 75db4705ab
6 changed files with 463 additions and 14 deletions
--- a/analysis/depristo/genotypeAccuracy/commands.R
+++ b/analysis/depristo/genotypeAccuracy/commands.R
@ -1,14 +1,19 @@
 require("lattice")
 require("ggplot2")
+require("splines")

 READ_DATA = F

 if ( READ_DATA ) {
-  d = read.table("~/Dropbox/Analysis/genotypeAccuracy/cgl.hiseq.table", header=T)
-  #d = read.table("~/Desktop/broadLocal/GATK/trunk/foo", header=T)
+  d = subset(read.table("~/Dropbox/Analysis/genotypeAccuracy/cgl.hiseq.table", header=T), rg != "ALL")
+  d$technology <- factor(1, levels=c("HiSeq-paper", "GA2-1000G", "HiSeq-recent"))
+  d$technology[grepl("ERR.*", d$rg)] <- "GA2-1000G"
+  d$technology[grepl("20.*", d$rg)] <- "HiSeq-paper"
+  d$technology[grepl("B00EG.*", d$rg)] <- "HiSeq-recent"
+  print(summary(d$technology))
+#d = read.table("~/Desktop/broadLocal/GATK/trunk/foo", header=T)
 }

-moltenCD = d
 #moltenCD = melt(d, id.vars=c("comp", "rg"), measure.vars=c("QofAAGivenD", "QofABGivenD", "QofBBGivenD"))
 #moltenCD$log10value = round(-10*log10(1-10^moltenCD$value))
  
@ -22,16 +27,19 @@ genotypeCounts <- function(x) {

 addEmpiricalPofG <- function(d) {
  r = c()
+  #
+  # TODO -- this is a really naive estimate of the accuracy, as it assumes the comp
+  # track is perfect.  In reality the chip is at best Q30 accurate (replicate samples have
+  # level than this level of concordance).  At low incoming confidence, we can effectively
+  # ignore this term but when the incoming Q is near or above Q30 this approximation clearly
+  # breaks down.
+  #
  for ( i in 1:dim(d)[1] ) {
    row = d[i,]
-    #print(row)
    if ( row$pGGivenDType == "QofAAGivenD" ) v = row$HOM_REF
    if ( row$pGGivenDType == "QofABGivenD" ) v = row$HET
    if ( row$pGGivenDType == "QofBBGivenD" ) v = row$HOM_VAR
-    #print(v)
-    #print(row$Sum)
    r = c(r, v / row$Sum)
-    #print(r)
  }

  #print(length(r))
@ -40,13 +48,27 @@ addEmpiricalPofG <- function(d) {
  return(d)
 }

-eByComp <- addEmpiricalPofG(ddply(moltenCD, .(rg, pGGivenDType, pGGivenD), genotypeCounts))
-print(subset(eByComp, EmpiricalPofGQ < Inf))
+eByComp <- addEmpiricalPofG(ddply(d, .(rg, technology, pGGivenDType, pGGivenD), genotypeCounts))
+countsByTech = addEmpiricalPofG(ddply(d, .(technology, pGGivenDType, pGGivenD), genotypeCounts))
+print(subset(countsByTech, pGGivenD > 18 & pGGivenD < 22 & pGGivenDType == "QofABGivenD"))
+#print(subset(eByComp, EmpiricalPofGQ < Inf))

 goodEByComp = subset(eByComp, Sum > 10 & EmpiricalPofGQ < Inf)

-print(qplot(pGGivenD, EmpiricalPofGQ, data=subset(goodEByComp, rg != "ALL"), size=log10(Sum), color=pGGivenDType, geom=c("point", "smooth"), group=pGGivenDType, xlim=c(0,40), ylim=c(0,40)) + geom_abline(slope=1, linetype=2))
-print(qplot(pGGivenD, EmpiricalPofGQ, data=subset(goodEByComp, rg != "ALL"), facets = . ~ pGGivenDType, color=rg, geom=c("line"), group=rg, xlim=c(0,40), ylim=c(0,40)) + geom_abline(slope=1, linetype=2))
-
+ymax = xmax = 30
+print(qplot(pGGivenD, EmpiricalPofGQ, data=goodEByComp, size=log10(Sum), facets = pGGivenDType ~ technology, color=pGGivenDType, geom=c("point", "smooth"), group=pGGivenDType, xlim=c(0,xmax), ylim=c(0,ymax)) + geom_abline(slope=1, linetype=2))
+
+print(qplot(pGGivenD, EmpiricalPofGQ, data=goodEByComp, facets = pGGivenDType ~ technology, color=rg, geom=c("blank"), group=rg, xlim=c(0,xmax), ylim=c(0,ymax)) 
+  + geom_abline(slope=1, linetype=2)
+  + geom_smooth(se=F, aes(weight=Sum)))
+
+print(qplot(pGGivenD, pGGivenD - EmpiricalPofGQ, data=goodEByComp, facets = pGGivenDType ~ technology, color=rg, geom=c("blank"), group=rg, xlim=c(0,xmax), ylim=c(-10,10)) 
+  + geom_abline(slope=0, linetype=2)
+  + geom_smooth(se=F, method=lm, formula = y ~ ns(x,1), aes(weight=Sum)))
+
+# By tech
+print(qplot(pGGivenD, EmpiricalPofGQ, data=goodEByComp, facets = pGGivenDType ~ ., color=technology, geom=c("blank"), group=technology, xlim=c(0,xmax), ylim=c(0,ymax)) 
+ geom_abline(slope=1, linetype=2)
+ geom_smooth(se=T, size=1.5, aes(weight=Sum)))


--- a/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContextUtils.java
+++ b/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContextUtils.java
@ -25,6 +25,7 @@

 package org.broadinstitute.sting.gatk.contexts;

+import net.sf.samtools.SAMReadGroupRecord;
 import org.broadinstitute.sting.gatk.datasources.sample.Sample;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.GenomeLoc;
@ -116,6 +117,25 @@ public class AlignmentContextUtils {
        return contexts;
    }

+    /**
+     * Splits the AlignmentContext into one context per read group
+     *
+     * @param context the original pileup
+     * @return a Map of ReadGroup to AlignmentContext
+     *
+     **/
+    public static Map<SAMReadGroupRecord, AlignmentContext> splitContextByReadGroup(AlignmentContext context, Collection<SAMReadGroupRecord> readGroups) {
+        HashMap<SAMReadGroupRecord, AlignmentContext> contexts = new HashMap<SAMReadGroupRecord, AlignmentContext>();
+
+        for (SAMReadGroupRecord rg : readGroups) {
+            ReadBackedPileup rgPileup = context.getBasePileup().getPileupForReadGroup(rg.getReadGroupId());
+            if ( rgPileup != null ) // there we some reads for RG
+                contexts.put(rg, new AlignmentContext(context.getLocation(), rgPileup));
+        }
+
+        return contexts;
+    }
+
    public static Map<String, AlignmentContext> splitContextBySampleName(ReadBackedPileup pileup, String assumedSingleSample) {
        return splitContextBySampleName(new AlignmentContext(pileup.getLocation(), pileup));
    }
--- a/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java
+++ b/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java
@ -162,6 +162,9 @@ public class RefMetaDataTracker {
        return bound;
    }

+    /**
+     * @return the number of ROD bindings (name -> value) where value is not empty in this tracker
+     */
    public int getNBoundRodTracks() {
        return getNBoundRodTracks(null);
    }
--- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CalibrateGenotypeLikelihoods.java
+++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CalibrateGenotypeLikelihoods.java
@ -0,0 +1,267 @@
+/*
+ * Copyright (c) 2010 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.oneoffprojects.walkers;
+
+import net.sf.samtools.SAMReadGroupRecord;
+import org.broad.tribble.util.variantcontext.Genotype;
+import org.broad.tribble.util.variantcontext.GenotypeLikelihoods;
+import org.broad.tribble.util.variantcontext.MutableVariantContext;
+import org.broad.tribble.util.variantcontext.VariantContext;
+import org.broad.tribble.vcf.VCFHeader;
+import org.broad.tribble.vcf.VCFHeaderLine;
+import org.broad.tribble.vcf.VCFWriter;
+import org.broadinstitute.sting.commandline.Argument;
+import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
+import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.walkers.*;
+import org.broadinstitute.sting.gatk.walkers.genotyper.*;
+import org.broadinstitute.sting.utils.*;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.vcf.VCFUtils;
+
+import java.io.PrintStream;
+import java.util.*;
+
+import static org.broadinstitute.sting.utils.IndelUtils.isInsideExtendedIndel;
+
+/**
+ * Validates the calls on a ROD track using a BAM dataset.
+ *
+ * @author carneiro
+ * @since Mar 3, 2011
+ * @help.summary Validates the calls on a ROD track using a BAM dataset.
+ */
+
+@Requires(value={DataSource.READS, DataSource.REFERENCE},referenceMetaData=@RMD(name="alleles",type=VariantContext.class))
+@Allows(value={DataSource.READS, DataSource.REFERENCE})
+
+// Ugly fix because RodWalkers don't have access to reads
+@By(DataSource.REFERENCE)
+@Reference(window=@Window(start=-200,stop=200))
+public class CalibrateGenotypeLikelihoods extends RodWalker<CalibrateGenotypeLikelihoods.Data, CalibrateGenotypeLikelihoods.Data> implements TreeReducible<CalibrateGenotypeLikelihoods.Data> {
+    public static final String COMP_NAME = "alleles";
+
+    @Argument(fullName="minimum_base_quality_score", shortName="mbq", doc="Minimum base quality score for calling a genotype", required=false)
+    private int mbq = -1;
+
+    @Argument(fullName="maximum_deletion_fraction", shortName="deletions", doc="Maximum deletion fraction for calling a genotype", required=false)
+    private double deletions = -1;
+
+    //@Argument(fullName="standard_min_confidence_threshold_for_calling", shortName="stand_call_conf", doc="the minimum phred-scaled Qscore threshold to separate high confidence from low confidence calls", required=false)
+    private double callConf = 0;
+
+    @Output(doc="File to which results should be written",required=true)
+    protected PrintStream out;
+
+    Set<String> samples;
+
+    public static class Data {
+        Collection<Datum> values;
+
+        public Data() { this(new LinkedList<Datum>()); }
+        public Data(Collection<Datum> data) { this.values = data; }
+
+        final public static Data EMPTY_DATA = new Data(Collections.<Datum>emptyList());
+    }
+
+    public static class Datum implements Comparable<Datum> {
+        // todo -- add event type (SNP, indel)
+        String rgID, sample;
+        GenotypeLikelihoods pl;
+        Genotype.Type compType;
+
+        @Override
+        public int compareTo(Datum o) {
+            int bySample = sample.compareTo(o.sample);
+            int byRG = rgID.compareTo(o.rgID);
+            return bySample != 0 ? bySample : byRG;
+        }
+
+        public Datum(String sample, String rgID, GenotypeLikelihoods pl, Genotype.Type compType) {
+            this.sample = sample;
+            this.rgID = rgID;
+            this.pl = pl;
+            this.compType = compType;
+        }
+    }
+
+    private UnifiedGenotyperEngine snpEngine;
+    private UnifiedGenotyperEngine indelEngine;
+
+    //---------------------------------------------------------------------------------------------------------------
+    //
+    // initialize
+    //
+    //---------------------------------------------------------------------------------------------------------------
+
+    public void initialize() {
+        samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
+        logger.info("Samples: " + samples);
+
+        List<ReferenceOrderedDataSource> rodList = this.getToolkit().getRodDataSources();
+        if ( rodList.size() != 1 )
+            throw new UserException.BadInput("You should provide exactly one genotype VCF");
+        if ( !rodList.get(0).getName().equals(COMP_NAME))
+            throw new UserException.BadInput("The ROD track has to be named \""+ COMP_NAME +"\". Not " + rodList.get(0).getName());
+
+        // Filling in SNP calling arguments for UG
+        UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
+        uac.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES;
+        uac.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
+        uac.NO_SLOD = true;
+
+        if (mbq >= 0) uac.MIN_BASE_QUALTY_SCORE = mbq;
+        if (deletions >= 0) uac.MAX_DELETION_FRACTION = deletions;
+
+        uac.STANDARD_CONFIDENCE_FOR_CALLING = callConf;
+
+        uac.GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP;
+        snpEngine = new UnifiedGenotyperEngine(getToolkit(), uac);
+
+        // Adding the INDEL calling arguments for UG
+        uac.GLmodel = GenotypeLikelihoodsCalculationModel.Model.INDEL;
+        indelEngine = new UnifiedGenotyperEngine(getToolkit(), uac);
+    }
+
+    //---------------------------------------------------------------------------------------------------------------
+    //
+    // map
+    //
+    //---------------------------------------------------------------------------------------------------------------
+    public Data map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
+        if ( tracker == null || tracker.getNBoundRodTracks() == 0 )
+            return Data.EMPTY_DATA;
+
+        VariantContext vcComp = SNPGenotypeLikelihoodsCalculationModel.getSNPVCFromAllelesRod(tracker, ref, false, logger);
+        if( vcComp == null )
+            return Data.EMPTY_DATA;
+
+        //todo - not sure I want this, may be misleading to filter extended indel events.
+        if (isInsideExtendedIndel(vcComp,  ref))
+            return Data.EMPTY_DATA;
+
+        Data data = new Data();
+        for ( String sample : samples ) {
+            Genotype compGT = getGenotype(tracker, ref, sample, COMP_NAME);
+            if ( compGT == null || compGT.isNoCall() )
+                continue;
+
+            Map<SAMReadGroupRecord,AlignmentContext> byRG = AlignmentContextUtils.splitContextByReadGroup(context, getToolkit().getSAMFileHeader().getReadGroups());
+            //byRG.put(new SAMReadGroupRecord("ALL"), context);
+            for ( Map.Entry<SAMReadGroupRecord, AlignmentContext> rgAC : byRG.entrySet() ) {
+                VariantCallContext call;
+                if ( vcComp.isIndel() ) {
+                    call = indelEngine.calculateLikelihoodsAndGenotypes(tracker, ref, rgAC.getValue());
+                } else {
+                    call = snpEngine.calculateLikelihoodsAndGenotypes(tracker, ref, rgAC.getValue());
+                }
+
+                if ( call == null )
+                    throw new ReviewedStingException("Unexpected genotyping failure " + sample + " at " + ref.getLocus() + " call " + call);
+
+                Genotype rgGT = call.getGenotype(sample);
+
+                if ( rgGT != null && ! rgGT.isNoCall() && rgGT.getLikelihoods().getAsVector() != null ) {
+                    Datum d = new Datum(sample, rgAC.getKey().getReadGroupId(), rgGT.getLikelihoods(), compGT.getType());
+                    data.values.add(d);
+                }
+            }
+        }
+
+        return data;
+    }
+
+    private Genotype getGenotype(RefMetaDataTracker tracker, ReferenceContext ref, String sample, String rod) {
+        for ( VariantContext vc : tracker.getVariantContexts(ref, rod, null, ref.getLocus(), true, false) ) {
+            if ( vc.isNotFiltered() && vc.hasGenotype(sample) )
+                return vc.getGenotype(sample);
+            else
+                return null;
+        }
+
+        return null;
+    }
+
+    //---------------------------------------------------------------------------------------------------------------
+    //
+    // reduce
+    //
+    //---------------------------------------------------------------------------------------------------------------
+
+    @Override
+    public Data reduceInit() {
+        return new Data();
+    }
+
+    @Override
+    public Data treeReduce( final Data sum1, final Data sum2) {
+        sum2.values.addAll(sum1.values);
+        return sum2;
+    }
+
+    @Override
+    public Data reduce( final Data mapValue, final Data reduceSum ) {
+        return treeReduce(mapValue, reduceSum);
+    }
+
+    @Override
+    public void onTraversalDone(Data data) {
+        // todo -- print the event type (SNP, indel)
+
+        // print the header
+        List<String> pGNames = Arrays.asList("QofAAGivenD", "QofABGivenD", "QofBBGivenD");
+        List<String> fields = Arrays.asList("sample", "rg", "pls", "comp", "pGGivenDType", "pGGivenD");
+        out.println(Utils.join("\t", fields));
+
+        double[] counts = new double[]{1, 1, 1};
+        for ( Datum d : data.values ) { counts[d.compType.ordinal()-1]++; }
+        double sum = MathUtils.sum(counts);
+        logger.info(String.format("Types %s %s %s", Genotype.Type.values()[1], Genotype.Type.values()[2], Genotype.Type.values()[3]));
+        logger.info(String.format("Counts %.0f %.0f %.0f %.0f", counts[0], counts[1], counts[2], sum));
+        double[] log10priors = new double[]{Math.log10(counts[0] / sum), Math.log10(counts[1] / sum), Math.log10(counts[2] / sum)};
+        logger.info(String.format("Priors %.2f %.2f %.2f", log10priors[0], log10priors[1], log10priors[2]));
+
+        for ( Datum d : data.values ) {
+            double[] log10pGGivenD = d.pl.getAsVector().clone();
+            for ( int i = 0; i < log10priors.length; i++ ) log10pGGivenD[i] += log10priors[i];
+            double[] pOfGGivenD = MathUtils.normalizeFromLog10(log10pGGivenD, false);
+            for ( int i = 0; i < pGNames.size(); i++ ) {
+                int q = QualityUtils.probToQual(pOfGGivenD[i], Math.pow(10.0, -9.9));
+                if ( q > 1 ) { // tons of 1s, and not interesting
+                    out.printf("%s\t%s\t%s\t%s\t%s\t%d%n",
+                            d.sample, d.rgID, d.pl.getAsString(), d.compType.toString(),
+                            pGNames.get(i), q);
+                }
+            }
+        }
+    }
+}
--- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/GenotypeConcordanceTable.java
+++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/GenotypeConcordanceTable.java
@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2010, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.oneoffprojects.walkers;
+
+import org.broad.tribble.util.variantcontext.Genotype;
+import org.broad.tribble.util.variantcontext.VariantContext;
+import org.broadinstitute.sting.commandline.Argument;
+import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.walkers.Requires;
+import org.broadinstitute.sting.gatk.walkers.RodWalker;
+import org.broadinstitute.sting.utils.SampleUtils;
+import org.broadinstitute.sting.utils.Utils;
+
+import java.io.PrintStream;
+import java.util.*;
+
+/**
+ * Emits a table of chrom/pos/sample/GQ/LoDGivenXX for AA, AB, BB/called genotypes for eval and comp for eval and comp VCFs
+ */
+@Requires(value={})
+public class GenotypeConcordanceTable extends RodWalker<Integer, Integer> {
+    public static final String EVAL_NAME = "eval";
+    public static final String COMP_NAME = "comp";
+
+    @Output(doc="File to which results should be written",required=true)
+    protected PrintStream out;
+
+    @Argument(doc="If provided, we will include information where EVAL is missing and COMP is missing", required=false)
+    protected boolean keepDoubleMissing = false;
+
+    @Argument(doc="If provided, we will include information where EVAL is no-called", required=false)
+    protected boolean keepEvalNoCall = false;
+
+    @Argument(doc="If provided, we will include information where COMP is no-called", required=false)
+    protected boolean keepCompNoCall = false;
+
+    Set<String> samples;
+
+    @Override
+    public void initialize() {
+        samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), Arrays.asList(EVAL_NAME));
+        logger.info("Samples: " + samples);
+        List<String> fields = Arrays.asList("chrom", "pos", "sample", "GQ", "LofDGivenAA", "LofDGivenAB", "LofDGivenBB", "concordant", "concordantInt", EVAL_NAME, COMP_NAME);
+        out.println(Utils.join("\t", fields));
+    }
+
+    @Override
+    public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
+        if ( tracker == null )
+            return 0;
+
+        if ( tracker.getNBoundRodTracks() > 0 ) {
+            for ( String sample : samples ) {
+                Genotype evalGT = getGenotype(tracker, ref, sample, EVAL_NAME);
+                Genotype compGT = getGenotype(tracker, ref, sample, COMP_NAME);
+
+                if ( evalGT == null && compGT == null && ! keepDoubleMissing )
+                    continue;
+
+                if ( (evalGT == null || evalGT.isNoCall()) && ! keepEvalNoCall )
+                    continue;
+
+                if ( (compGT == null || compGT.isNoCall()) && ! keepCompNoCall )
+                    continue;
+
+                out.printf("%s\t%s\t%s\t", ref.getLocus().getContig(), ref.getLocus().getStart(), sample);
+                String evalGQ = evalGT == null ? "NA" : String.format("%d", (int)(10*evalGT.getNegLog10PError()));
+
+                String LofDGivenAA = "-1", LofDGivenAB = "-1", LofDGivenBB = "-1";
+                if ( evalGT != null ) {
+                    double[] pls = evalGT.getLikelihoods().getAsVector();
+                    if ( pls != null ) { // not missing
+                        LofDGivenAA = String.format("%.0f", pls[0]);
+                        LofDGivenAB = String.format("%.0f", pls[1]);
+                        LofDGivenBB = String.format("%.0f", pls[2]);
+                    }
+                }
+
+                String concordance = evalGT == null || compGT == null ? "NA" : String.format("%s", evalGT.getType() == compGT.getType());
+                String concordanceInt = Integer.toString(evalGT == null || compGT == null ? -1 : (evalGT.getType() == compGT.getType() ? 1 : 0));
+                String evalType = evalGT == null ? "MISSING" : evalGT.getType().toString();
+                String compType = compGT == null ? "MISSING" : compGT.getType().toString();
+                out.println(Utils.join("\t", Arrays.asList(evalGQ, LofDGivenAA, LofDGivenAB, LofDGivenBB, concordance, concordanceInt, evalType, compType)));
+            }
+        }
+
+        return 1;
+    }
+
+    private Genotype getGenotype(RefMetaDataTracker tracker, ReferenceContext ref, String sample, String rod) {
+        for ( VariantContext vc : tracker.getVariantContexts(ref, rod, null, ref.getLocus(), true, false) ) {
+            if ( vc.isNotFiltered() && vc.hasGenotype(sample) )
+                return vc.getGenotype(sample);
+            else
+                return null;
+        }
+
+        return null;
+    }
+
+    @Override
+    public Integer reduceInit() {
+        return 0;
+    }
+
+    @Override
+    public Integer reduce(Integer counter, Integer sum) {
+        return counter + sum;
+    }
+
+    @Override
+    public void onTraversalDone(Integer sum) {}
+}
--- a/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java
+++ b/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java
@ -501,8 +501,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
                if(pileup != null)
                    filteredTracker.addElements(sample,pileup.pileupElementTracker);
            }
-            return (RBP)createNewPileup(loc,filteredTracker);
-
+            return filteredTracker.size()>0 ? (RBP)createNewPileup(loc,filteredTracker) : null;
        }
        else {
            UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();