From 1373fee27811accf27add9b494eef53d84c3d0f6 Mon Sep 17 00:00:00 2001 From: asivache Date: Wed, 21 Apr 2010 18:19:03 +0000 Subject: [PATCH] Because of the ugly VCF format, generic addCall() method of GenotypeWriter interface acquired an additional parameter, explicitly specified reference base (in VCF it's the base immediately *before* the event in case of indels, so we got to pass it). All implementing classes are modified to accomodate the change. VCFGenotypeWriterAdapter now explicitly uses the passed reference base instead of deriving it from VatriantContext (in SNP mode as well!), other writers simply ignore that additional argument. SimpleIndelCalculationModel now WORKS (or rather, it does produce calls :) ) git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3228 348d0f76-0448-11de-a6fe-93d51630548a --- .../io/storage/GenotypeWriterStorage.java | 4 +- .../gatk/io/stubs/GenotypeWriterStub.java | 4 +- .../SimpleIndelCalculationModel.java | 88 ++++++++++++++++--- .../walkers/genotyper/UnifiedGenotyper.java | 2 +- .../genotyper/UnifiedGenotyperEngine.java | 1 + .../walkers/genotyper/VariantCallContext.java | 11 +++ .../sting/utils/genotype/GenotypeWriter.java | 4 +- .../utils/genotype/geli/GeliAdapter.java | 3 +- .../utils/genotype/geli/GeliTextWriter.java | 3 +- .../sting/utils/genotype/glf/GLFWriter.java | 3 +- .../vcf/VCFGenotypeWriterAdapter.java | 6 +- 11 files changed, 107 insertions(+), 22 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java b/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java index c8d793ad1..52c58d99f 100755 --- a/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java +++ b/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java @@ -76,8 +76,8 @@ public abstract class GenotypeWriterStorage implements GenotypeWriterFactory.writeHeader(writer, stub.getSAMFileHeader(), samples, new HashSet()); } - public void addCall(VariantContext vc) { - writer.addCall(vc); + public void addCall(VariantContext vc, String refAllele) { + writer.addCall(vc,refAllele); } public void close() { diff --git a/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterStub.java b/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterStub.java index 1c83aea95..8c170dcb4 100755 --- a/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterStub.java +++ b/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterStub.java @@ -129,8 +129,8 @@ public abstract class GenotypeWriterStub implements St /** * @{inheritDoc} */ - public void addCall(VariantContext vc) { - outputTracker.getStorage(this).addCall(vc); + public void addCall(VariantContext vc, String refAllele) { + outputTracker.getStorage(this).addCall(vc,refAllele); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SimpleIndelCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SimpleIndelCalculationModel.java index c210d568d..c41f4c835 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SimpleIndelCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SimpleIndelCalculationModel.java @@ -1,6 +1,8 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.pileup.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.rodDbSNP; @@ -11,39 +13,103 @@ import java.util.*; public class SimpleIndelCalculationModel extends GenotypeCalculationModel { + private int MIN_COVERAGE = 6; + private double MIN_FRACTION = 0.3; + private double MIN_CONSENSUS_FRACTION = 0.7 ; // the previous normal event context - private Map cachedContext; +// private Map cachedContext; protected SimpleIndelCalculationModel() {} - private int testSkipCount = 5; + private int totalIndels = 0; + private int totalCoverage = 0; + private int bestIndelCount = 0; + private String bestEvent = null; + public VariantCallContext callLocus(RefMetaDataTracker tracker, char ref, GenomeLoc loc, Map contexts, DiploidGenotypePriors priors) { - cachedContext = contexts; +// cachedContext = contexts; return null; } public VariantCallContext callExtendedLocus(RefMetaDataTracker tracker, char[] ref, GenomeLoc loc, Map contexts) { + totalIndels = 0; + totalCoverage = 0; + bestIndelCount = 0; + bestEvent = null; + /* System.out.println("\nReached " + loc + " through an extended event"); for (Map.Entry e : contexts.entrySet()) { System.out.println("Set "+e.getKey()); System.out.println(" Context: "+e.getValue().getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size()+ " reads"); - System.out.println(" Cached context: "+ - cachedContext.get(e.getKey()).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size()+ " reads"); - System.out.println(" First read in cached context: "+ - cachedContext.get(e.getKey()).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup().getReads().get(0).getReadName()); ReadBackedExtendedEventPileup p = e.getValue().getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getExtendedEventPileup(); if ( p== null ) System.out.println("EXTENDED PILEUP IS NULL"); System.out.println(" Event(s): " + e.getValue().getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getExtendedEventPileup().getEventStringsWithCounts(ref)); } - if ( testSkipCount==0 ) System.exit(1); - testSkipCount--; - // TODO -- implement me +*/ + initializeAlleles(ref, contexts); + VariantCallContext vcc = new VariantCallContext(false); + + if ( totalIndels == 0 ) return vcc; // this can happen if indel-containing reads get filtered out by the engine + + if ( totalCoverage < MIN_COVERAGE ) return vcc; + + if ( ((double)bestIndelCount)/totalCoverage < MIN_FRACTION ) return vcc; + + if ( ((double)bestIndelCount)/totalIndels < MIN_CONSENSUS_FRACTION ) return vcc; + + List alleles = new ArrayList(2); + + if ( bestEvent.charAt(0) == '+') { + alleles.add( new Allele(Allele.NULL_ALLELE_STRING,true) ); + alleles.add( new Allele(bestEvent.substring(1), false )); + } else { + if ( bestEvent.charAt(0) == '-' ) { + alleles.add( new Allele(Allele.NULL_ALLELE_STRING,false) ); + alleles.add( new Allele(bestEvent.substring(1), true )); + loc = GenomeLocParser.setStop(loc, loc.getStop() + bestEvent.length()-2); + } else + throw new StingException("Internal error (probably a bug): event does not conform to expected format: "+ bestEvent); + } + + VariantContext vc = new VariantContext("UG_Indel_call", loc, alleles, new HashMap() /* genotypes */, + 0.0 /* log error */, null /* filters */, null /* attributes */); + + vcc = new VariantCallContext(vc,true); +/* + if ( totalIndels > 0 ) { + System.out.println("Calling: "+bestEvent+" ["+bestIndelCount+"/"+totalIndels+"/"+totalCoverage+"] at "+loc); + } else { + System.out.println("NO EVENT"); + } +*/ //VariantContext vc = new MutableVariantContext("UG_indel_call", loc, alleles, genotypes, phredScaledConfidence/10.0, null, attributes); //return new VariantCallContext(vc, phredScaledConfidence >= CONFIDENCE_THRESHOLD); - return null; + return vcc; + } + + protected void initializeAlleles(char [] ref, Map contexts) { + + + for ( String sample : contexts.keySet() ) { + AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE); + + totalCoverage += context.size(); + + // calculate the sum of quality scores for each base + ReadBackedExtendedEventPileup pileup = context.getExtendedEventPileup(); + + List> all_events = pileup.getEventStringsWithCounts(ref); + for ( Pair p : all_events ) { + if ( p.second > bestIndelCount ) { + bestIndelCount = p.second; + bestEvent = p.first; + } + totalIndels += p.second; + } + } } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 2d9e1df41..94b74556e 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -214,7 +214,7 @@ public class UnifiedGenotyper extends LocusWalker we're a ref site VariantCallContext(boolean confidentlyCalledP) { this.confidentlyCalled = confidentlyCalledP; } + + public void setRefAllele(String refAllele) { + this.refAllele = refAllele; + } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java index 4439b8f16..6290d6b4e 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java @@ -39,8 +39,10 @@ public interface GenotypeWriter { /** * Add a genotype, given a variant context * @param vc the variant context representing the call to add + * @param refAllele witers are allowed to ignore it; however it is required for VCF writers, as the + * VCF format explicitly requires (previous) ref base for an indel. Currently, refAllele is expected to hold a single character */ - public void addCall(VariantContext vc); + public void addCall(VariantContext vc, String refAllele); /** finish writing, closing any open files. */ public void close(); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java index 71865be20..0a3c4f65c 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java @@ -107,8 +107,9 @@ public class GeliAdapter implements GeliGenotypeWriter { * Add a genotype, given a variant context * * @param vc the variant context representing the call to add + * @param refAllele not used by this writer */ - public void addCall(VariantContext vc) { + public void addCall(VariantContext vc, String refAllele) { if ( writer == null ) throw new IllegalStateException("The Geli Header must be written before calls can be added"); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java index d5c478abe..57f6b5e01 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java @@ -68,8 +68,9 @@ public class GeliTextWriter implements GeliGenotypeWriter { * Add a genotype, given a variant context * * @param vc the variant context representing the call to add + * @param refAllele required by the inteface; not used by this writer. */ - public void addCall(VariantContext vc) { + public void addCall(VariantContext vc, String refAllele) { char ref = vc.getReference().toString().charAt(0); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java index 1b57353dc..3923860ea 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java @@ -148,8 +148,9 @@ public class GLFWriter implements GLFGenotypeWriter { * Add a genotype, given a variant context * * @param vc the variant context representing the call to add + * @param refAllele not used by this writer */ - public void addCall(VariantContext vc) { + public void addCall(VariantContext vc, String refAllele) { if ( headerText == null ) throw new IllegalStateException("The GLF Header must be written before calls can be added"); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java index 4c8981fb6..d6909692e 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java @@ -78,12 +78,14 @@ public class VCFGenotypeWriterAdapter implements VCFGenotypeWriter { * Add a genotype, given a variant context * * @param vc the variant context representing the call to add + * @param refAllele currently has to be a single character representing the reference base (the base + * immediately preceding the event in case of indels) */ - public void addCall(VariantContext vc) { + public void addCall(VariantContext vc, String refAllele) { if ( mHeader == null ) throw new IllegalStateException("The VCF Header must be written before records can be added"); - VCFRecord call = VariantContextAdaptors.toVCF(vc, vc.getReference().toString().charAt(0), allowedGenotypeFormatStrings, false, false); + VCFRecord call = VariantContextAdaptors.toVCF(vc, refAllele.charAt(0), allowedGenotypeFormatStrings, false, false); Set altAlleles = vc.getAlternateAlleles(); StringBuffer altAlleleCountString = new StringBuffer();