From 75db4705abfe2402fac8aaeef6879389e2b1b78e Mon Sep 17 00:00:00 2001 From: depristo Date: Sun, 15 May 2011 17:36:07 +0000 Subject: [PATCH] Added splitContextByReadGroup() and fixed bug in getPileupForReadGroup() that resulted in a NPE when no reads where present for a read group. Added doc string for getNBoundRodTracks() Intermediate commit for CalibrateGenotypeLikelihoods and GenotypeConcordanceTable, so I have a record of my work. Not ready for public consumption. Really looking forward to making local commits so I can track my progress without needing to push incomplete functionality up to the server. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5807 348d0f76-0448-11de-a6fe-93d51630548a --- analysis/depristo/genotypeAccuracy/commands.R | 46 ++- .../gatk/contexts/AlignmentContextUtils.java | 20 ++ .../gatk/refdata/RefMetaDataTracker.java | 3 + .../walkers/CalibrateGenotypeLikelihoods.java | 267 ++++++++++++++++++ .../walkers/GenotypeConcordanceTable.java | 138 +++++++++ .../pileup/AbstractReadBackedPileup.java | 3 +- 6 files changed, 463 insertions(+), 14 deletions(-) create mode 100644 java/src/org/broadinstitute/sting/oneoffprojects/walkers/CalibrateGenotypeLikelihoods.java create mode 100755 java/src/org/broadinstitute/sting/oneoffprojects/walkers/GenotypeConcordanceTable.java diff --git a/analysis/depristo/genotypeAccuracy/commands.R b/analysis/depristo/genotypeAccuracy/commands.R index 1b684c059..4d25f6fc0 100644 --- a/analysis/depristo/genotypeAccuracy/commands.R +++ b/analysis/depristo/genotypeAccuracy/commands.R @@ -1,14 +1,19 @@ require("lattice") require("ggplot2") +require("splines") READ_DATA = F if ( READ_DATA ) { - d = read.table("~/Dropbox/Analysis/genotypeAccuracy/cgl.hiseq.table", header=T) - #d = read.table("~/Desktop/broadLocal/GATK/trunk/foo", header=T) + d = subset(read.table("~/Dropbox/Analysis/genotypeAccuracy/cgl.hiseq.table", header=T), rg != "ALL") + d$technology <- factor(1, levels=c("HiSeq-paper", "GA2-1000G", "HiSeq-recent")) + d$technology[grepl("ERR.*", d$rg)] <- "GA2-1000G" + d$technology[grepl("20.*", d$rg)] <- "HiSeq-paper" + d$technology[grepl("B00EG.*", d$rg)] <- "HiSeq-recent" + print(summary(d$technology)) +#d = read.table("~/Desktop/broadLocal/GATK/trunk/foo", header=T) } -moltenCD = d #moltenCD = melt(d, id.vars=c("comp", "rg"), measure.vars=c("QofAAGivenD", "QofABGivenD", "QofBBGivenD")) #moltenCD$log10value = round(-10*log10(1-10^moltenCD$value)) @@ -22,16 +27,19 @@ genotypeCounts <- function(x) { addEmpiricalPofG <- function(d) { r = c() + # + # TODO -- this is a really naive estimate of the accuracy, as it assumes the comp + # track is perfect. In reality the chip is at best Q30 accurate (replicate samples have + # level than this level of concordance). At low incoming confidence, we can effectively + # ignore this term but when the incoming Q is near or above Q30 this approximation clearly + # breaks down. + # for ( i in 1:dim(d)[1] ) { row = d[i,] - #print(row) if ( row$pGGivenDType == "QofAAGivenD" ) v = row$HOM_REF if ( row$pGGivenDType == "QofABGivenD" ) v = row$HET if ( row$pGGivenDType == "QofBBGivenD" ) v = row$HOM_VAR - #print(v) - #print(row$Sum) r = c(r, v / row$Sum) - #print(r) } #print(length(r)) @@ -40,13 +48,27 @@ addEmpiricalPofG <- function(d) { return(d) } -eByComp <- addEmpiricalPofG(ddply(moltenCD, .(rg, pGGivenDType, pGGivenD), genotypeCounts)) -print(subset(eByComp, EmpiricalPofGQ < Inf)) +eByComp <- addEmpiricalPofG(ddply(d, .(rg, technology, pGGivenDType, pGGivenD), genotypeCounts)) +countsByTech = addEmpiricalPofG(ddply(d, .(technology, pGGivenDType, pGGivenD), genotypeCounts)) +print(subset(countsByTech, pGGivenD > 18 & pGGivenD < 22 & pGGivenDType == "QofABGivenD")) +#print(subset(eByComp, EmpiricalPofGQ < Inf)) goodEByComp = subset(eByComp, Sum > 10 & EmpiricalPofGQ < Inf) -print(qplot(pGGivenD, EmpiricalPofGQ, data=subset(goodEByComp, rg != "ALL"), size=log10(Sum), color=pGGivenDType, geom=c("point", "smooth"), group=pGGivenDType, xlim=c(0,40), ylim=c(0,40)) + geom_abline(slope=1, linetype=2)) -print(qplot(pGGivenD, EmpiricalPofGQ, data=subset(goodEByComp, rg != "ALL"), facets = . ~ pGGivenDType, color=rg, geom=c("line"), group=rg, xlim=c(0,40), ylim=c(0,40)) + geom_abline(slope=1, linetype=2)) - +ymax = xmax = 30 +print(qplot(pGGivenD, EmpiricalPofGQ, data=goodEByComp, size=log10(Sum), facets = pGGivenDType ~ technology, color=pGGivenDType, geom=c("point", "smooth"), group=pGGivenDType, xlim=c(0,xmax), ylim=c(0,ymax)) + geom_abline(slope=1, linetype=2)) + +print(qplot(pGGivenD, EmpiricalPofGQ, data=goodEByComp, facets = pGGivenDType ~ technology, color=rg, geom=c("blank"), group=rg, xlim=c(0,xmax), ylim=c(0,ymax)) + + geom_abline(slope=1, linetype=2) + + geom_smooth(se=F, aes(weight=Sum))) + +print(qplot(pGGivenD, pGGivenD - EmpiricalPofGQ, data=goodEByComp, facets = pGGivenDType ~ technology, color=rg, geom=c("blank"), group=rg, xlim=c(0,xmax), ylim=c(-10,10)) + + geom_abline(slope=0, linetype=2) + + geom_smooth(se=F, method=lm, formula = y ~ ns(x,1), aes(weight=Sum))) + +# By tech +print(qplot(pGGivenD, EmpiricalPofGQ, data=goodEByComp, facets = pGGivenDType ~ ., color=technology, geom=c("blank"), group=technology, xlim=c(0,xmax), ylim=c(0,ymax)) ++ geom_abline(slope=1, linetype=2) ++ geom_smooth(se=T, size=1.5, aes(weight=Sum))) diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContextUtils.java b/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContextUtils.java index 0cacf0dff..3a96b23e2 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContextUtils.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContextUtils.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.contexts; +import net.sf.samtools.SAMReadGroupRecord; import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.GenomeLoc; @@ -116,6 +117,25 @@ public class AlignmentContextUtils { return contexts; } + /** + * Splits the AlignmentContext into one context per read group + * + * @param context the original pileup + * @return a Map of ReadGroup to AlignmentContext + * + **/ + public static Map splitContextByReadGroup(AlignmentContext context, Collection readGroups) { + HashMap contexts = new HashMap(); + + for (SAMReadGroupRecord rg : readGroups) { + ReadBackedPileup rgPileup = context.getBasePileup().getPileupForReadGroup(rg.getReadGroupId()); + if ( rgPileup != null ) // there we some reads for RG + contexts.put(rg, new AlignmentContext(context.getLocation(), rgPileup)); + } + + return contexts; + } + public static Map splitContextBySampleName(ReadBackedPileup pileup, String assumedSingleSample) { return splitContextBySampleName(new AlignmentContext(pileup.getLocation(), pileup)); } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java b/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java index d5063d643..239877ccf 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java @@ -162,6 +162,9 @@ public class RefMetaDataTracker { return bound; } + /** + * @return the number of ROD bindings (name -> value) where value is not empty in this tracker + */ public int getNBoundRodTracks() { return getNBoundRodTracks(null); } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CalibrateGenotypeLikelihoods.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CalibrateGenotypeLikelihoods.java new file mode 100644 index 000000000..4f0a6287b --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CalibrateGenotypeLikelihoods.java @@ -0,0 +1,267 @@ +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.oneoffprojects.walkers; + +import net.sf.samtools.SAMReadGroupRecord; +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.GenotypeLikelihoods; +import org.broad.tribble.util.variantcontext.MutableVariantContext; +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broad.tribble.vcf.VCFHeader; +import org.broad.tribble.vcf.VCFHeaderLine; +import org.broad.tribble.vcf.VCFWriter; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.gatk.walkers.genotyper.*; +import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.vcf.VCFUtils; + +import java.io.PrintStream; +import java.util.*; + +import static org.broadinstitute.sting.utils.IndelUtils.isInsideExtendedIndel; + +/** + * Validates the calls on a ROD track using a BAM dataset. + * + * @author carneiro + * @since Mar 3, 2011 + * @help.summary Validates the calls on a ROD track using a BAM dataset. + */ + +@Requires(value={DataSource.READS, DataSource.REFERENCE},referenceMetaData=@RMD(name="alleles",type=VariantContext.class)) +@Allows(value={DataSource.READS, DataSource.REFERENCE}) + +// Ugly fix because RodWalkers don't have access to reads +@By(DataSource.REFERENCE) +@Reference(window=@Window(start=-200,stop=200)) +public class CalibrateGenotypeLikelihoods extends RodWalker implements TreeReducible { + public static final String COMP_NAME = "alleles"; + + @Argument(fullName="minimum_base_quality_score", shortName="mbq", doc="Minimum base quality score for calling a genotype", required=false) + private int mbq = -1; + + @Argument(fullName="maximum_deletion_fraction", shortName="deletions", doc="Maximum deletion fraction for calling a genotype", required=false) + private double deletions = -1; + + //@Argument(fullName="standard_min_confidence_threshold_for_calling", shortName="stand_call_conf", doc="the minimum phred-scaled Qscore threshold to separate high confidence from low confidence calls", required=false) + private double callConf = 0; + + @Output(doc="File to which results should be written",required=true) + protected PrintStream out; + + Set samples; + + public static class Data { + Collection values; + + public Data() { this(new LinkedList()); } + public Data(Collection data) { this.values = data; } + + final public static Data EMPTY_DATA = new Data(Collections.emptyList()); + } + + public static class Datum implements Comparable { + // todo -- add event type (SNP, indel) + String rgID, sample; + GenotypeLikelihoods pl; + Genotype.Type compType; + + @Override + public int compareTo(Datum o) { + int bySample = sample.compareTo(o.sample); + int byRG = rgID.compareTo(o.rgID); + return bySample != 0 ? bySample : byRG; + } + + public Datum(String sample, String rgID, GenotypeLikelihoods pl, Genotype.Type compType) { + this.sample = sample; + this.rgID = rgID; + this.pl = pl; + this.compType = compType; + } + } + + private UnifiedGenotyperEngine snpEngine; + private UnifiedGenotyperEngine indelEngine; + + //--------------------------------------------------------------------------------------------------------------- + // + // initialize + // + //--------------------------------------------------------------------------------------------------------------- + + public void initialize() { + samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()); + logger.info("Samples: " + samples); + + List rodList = this.getToolkit().getRodDataSources(); + if ( rodList.size() != 1 ) + throw new UserException.BadInput("You should provide exactly one genotype VCF"); + if ( !rodList.get(0).getName().equals(COMP_NAME)) + throw new UserException.BadInput("The ROD track has to be named \""+ COMP_NAME +"\". Not " + rodList.get(0).getName()); + + // Filling in SNP calling arguments for UG + UnifiedArgumentCollection uac = new UnifiedArgumentCollection(); + uac.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES; + uac.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES; + uac.NO_SLOD = true; + + if (mbq >= 0) uac.MIN_BASE_QUALTY_SCORE = mbq; + if (deletions >= 0) uac.MAX_DELETION_FRACTION = deletions; + + uac.STANDARD_CONFIDENCE_FOR_CALLING = callConf; + + uac.GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP; + snpEngine = new UnifiedGenotyperEngine(getToolkit(), uac); + + // Adding the INDEL calling arguments for UG + uac.GLmodel = GenotypeLikelihoodsCalculationModel.Model.INDEL; + indelEngine = new UnifiedGenotyperEngine(getToolkit(), uac); + } + + //--------------------------------------------------------------------------------------------------------------- + // + // map + // + //--------------------------------------------------------------------------------------------------------------- + public Data map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) { + if ( tracker == null || tracker.getNBoundRodTracks() == 0 ) + return Data.EMPTY_DATA; + + VariantContext vcComp = SNPGenotypeLikelihoodsCalculationModel.getSNPVCFromAllelesRod(tracker, ref, false, logger); + if( vcComp == null ) + return Data.EMPTY_DATA; + + //todo - not sure I want this, may be misleading to filter extended indel events. + if (isInsideExtendedIndel(vcComp, ref)) + return Data.EMPTY_DATA; + + Data data = new Data(); + for ( String sample : samples ) { + Genotype compGT = getGenotype(tracker, ref, sample, COMP_NAME); + if ( compGT == null || compGT.isNoCall() ) + continue; + + Map byRG = AlignmentContextUtils.splitContextByReadGroup(context, getToolkit().getSAMFileHeader().getReadGroups()); + //byRG.put(new SAMReadGroupRecord("ALL"), context); + for ( Map.Entry rgAC : byRG.entrySet() ) { + VariantCallContext call; + if ( vcComp.isIndel() ) { + call = indelEngine.calculateLikelihoodsAndGenotypes(tracker, ref, rgAC.getValue()); + } else { + call = snpEngine.calculateLikelihoodsAndGenotypes(tracker, ref, rgAC.getValue()); + } + + if ( call == null ) + throw new ReviewedStingException("Unexpected genotyping failure " + sample + " at " + ref.getLocus() + " call " + call); + + Genotype rgGT = call.getGenotype(sample); + + if ( rgGT != null && ! rgGT.isNoCall() && rgGT.getLikelihoods().getAsVector() != null ) { + Datum d = new Datum(sample, rgAC.getKey().getReadGroupId(), rgGT.getLikelihoods(), compGT.getType()); + data.values.add(d); + } + } + } + + return data; + } + + private Genotype getGenotype(RefMetaDataTracker tracker, ReferenceContext ref, String sample, String rod) { + for ( VariantContext vc : tracker.getVariantContexts(ref, rod, null, ref.getLocus(), true, false) ) { + if ( vc.isNotFiltered() && vc.hasGenotype(sample) ) + return vc.getGenotype(sample); + else + return null; + } + + return null; + } + + //--------------------------------------------------------------------------------------------------------------- + // + // reduce + // + //--------------------------------------------------------------------------------------------------------------- + + @Override + public Data reduceInit() { + return new Data(); + } + + @Override + public Data treeReduce( final Data sum1, final Data sum2) { + sum2.values.addAll(sum1.values); + return sum2; + } + + @Override + public Data reduce( final Data mapValue, final Data reduceSum ) { + return treeReduce(mapValue, reduceSum); + } + + @Override + public void onTraversalDone(Data data) { + // todo -- print the event type (SNP, indel) + + // print the header + List pGNames = Arrays.asList("QofAAGivenD", "QofABGivenD", "QofBBGivenD"); + List fields = Arrays.asList("sample", "rg", "pls", "comp", "pGGivenDType", "pGGivenD"); + out.println(Utils.join("\t", fields)); + + double[] counts = new double[]{1, 1, 1}; + for ( Datum d : data.values ) { counts[d.compType.ordinal()-1]++; } + double sum = MathUtils.sum(counts); + logger.info(String.format("Types %s %s %s", Genotype.Type.values()[1], Genotype.Type.values()[2], Genotype.Type.values()[3])); + logger.info(String.format("Counts %.0f %.0f %.0f %.0f", counts[0], counts[1], counts[2], sum)); + double[] log10priors = new double[]{Math.log10(counts[0] / sum), Math.log10(counts[1] / sum), Math.log10(counts[2] / sum)}; + logger.info(String.format("Priors %.2f %.2f %.2f", log10priors[0], log10priors[1], log10priors[2])); + + for ( Datum d : data.values ) { + double[] log10pGGivenD = d.pl.getAsVector().clone(); + for ( int i = 0; i < log10priors.length; i++ ) log10pGGivenD[i] += log10priors[i]; + double[] pOfGGivenD = MathUtils.normalizeFromLog10(log10pGGivenD, false); + for ( int i = 0; i < pGNames.size(); i++ ) { + int q = QualityUtils.probToQual(pOfGGivenD[i], Math.pow(10.0, -9.9)); + if ( q > 1 ) { // tons of 1s, and not interesting + out.printf("%s\t%s\t%s\t%s\t%s\t%d%n", + d.sample, d.rgID, d.pl.getAsString(), d.compType.toString(), + pGNames.get(i), q); + } + } + } + } +} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/GenotypeConcordanceTable.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/GenotypeConcordanceTable.java new file mode 100755 index 000000000..cfe7fb9a3 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/GenotypeConcordanceTable.java @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2010, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.oneoffprojects.walkers; + +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.Utils; + +import java.io.PrintStream; +import java.util.*; + +/** + * Emits a table of chrom/pos/sample/GQ/LoDGivenXX for AA, AB, BB/called genotypes for eval and comp for eval and comp VCFs + */ +@Requires(value={}) +public class GenotypeConcordanceTable extends RodWalker { + public static final String EVAL_NAME = "eval"; + public static final String COMP_NAME = "comp"; + + @Output(doc="File to which results should be written",required=true) + protected PrintStream out; + + @Argument(doc="If provided, we will include information where EVAL is missing and COMP is missing", required=false) + protected boolean keepDoubleMissing = false; + + @Argument(doc="If provided, we will include information where EVAL is no-called", required=false) + protected boolean keepEvalNoCall = false; + + @Argument(doc="If provided, we will include information where COMP is no-called", required=false) + protected boolean keepCompNoCall = false; + + Set samples; + + @Override + public void initialize() { + samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), Arrays.asList(EVAL_NAME)); + logger.info("Samples: " + samples); + List fields = Arrays.asList("chrom", "pos", "sample", "GQ", "LofDGivenAA", "LofDGivenAB", "LofDGivenBB", "concordant", "concordantInt", EVAL_NAME, COMP_NAME); + out.println(Utils.join("\t", fields)); + } + + @Override + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if ( tracker == null ) + return 0; + + if ( tracker.getNBoundRodTracks() > 0 ) { + for ( String sample : samples ) { + Genotype evalGT = getGenotype(tracker, ref, sample, EVAL_NAME); + Genotype compGT = getGenotype(tracker, ref, sample, COMP_NAME); + + if ( evalGT == null && compGT == null && ! keepDoubleMissing ) + continue; + + if ( (evalGT == null || evalGT.isNoCall()) && ! keepEvalNoCall ) + continue; + + if ( (compGT == null || compGT.isNoCall()) && ! keepCompNoCall ) + continue; + + out.printf("%s\t%s\t%s\t", ref.getLocus().getContig(), ref.getLocus().getStart(), sample); + String evalGQ = evalGT == null ? "NA" : String.format("%d", (int)(10*evalGT.getNegLog10PError())); + + String LofDGivenAA = "-1", LofDGivenAB = "-1", LofDGivenBB = "-1"; + if ( evalGT != null ) { + double[] pls = evalGT.getLikelihoods().getAsVector(); + if ( pls != null ) { // not missing + LofDGivenAA = String.format("%.0f", pls[0]); + LofDGivenAB = String.format("%.0f", pls[1]); + LofDGivenBB = String.format("%.0f", pls[2]); + } + } + + String concordance = evalGT == null || compGT == null ? "NA" : String.format("%s", evalGT.getType() == compGT.getType()); + String concordanceInt = Integer.toString(evalGT == null || compGT == null ? -1 : (evalGT.getType() == compGT.getType() ? 1 : 0)); + String evalType = evalGT == null ? "MISSING" : evalGT.getType().toString(); + String compType = compGT == null ? "MISSING" : compGT.getType().toString(); + out.println(Utils.join("\t", Arrays.asList(evalGQ, LofDGivenAA, LofDGivenAB, LofDGivenBB, concordance, concordanceInt, evalType, compType))); + } + } + + return 1; + } + + private Genotype getGenotype(RefMetaDataTracker tracker, ReferenceContext ref, String sample, String rod) { + for ( VariantContext vc : tracker.getVariantContexts(ref, rod, null, ref.getLocus(), true, false) ) { + if ( vc.isNotFiltered() && vc.hasGenotype(sample) ) + return vc.getGenotype(sample); + else + return null; + } + + return null; + } + + @Override + public Integer reduceInit() { + return 0; + } + + @Override + public Integer reduce(Integer counter, Integer sum) { + return counter + sum; + } + + @Override + public void onTraversalDone(Integer sum) {} +} diff --git a/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java b/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java index cd8486eef..98f811229 100644 --- a/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java +++ b/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java @@ -501,8 +501,7 @@ public abstract class AbstractReadBackedPileup0 ? (RBP)createNewPileup(loc,filteredTracker) : null; } else { UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker();