diff --git a/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java b/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java index c8d793ad1..52c58d99f 100755 --- a/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java +++ b/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java @@ -76,8 +76,8 @@ public abstract class GenotypeWriterStorage implements GenotypeWriterFactory.writeHeader(writer, stub.getSAMFileHeader(), samples, new HashSet()); } - public void addCall(VariantContext vc) { - writer.addCall(vc); + public void addCall(VariantContext vc, String refAllele) { + writer.addCall(vc,refAllele); } public void close() { diff --git a/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterStub.java b/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterStub.java index 1c83aea95..8c170dcb4 100755 --- a/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterStub.java +++ b/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterStub.java @@ -129,8 +129,8 @@ public abstract class GenotypeWriterStub implements St /** * @{inheritDoc} */ - public void addCall(VariantContext vc) { - outputTracker.getStorage(this).addCall(vc); + public void addCall(VariantContext vc, String refAllele) { + outputTracker.getStorage(this).addCall(vc,refAllele); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SimpleIndelCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SimpleIndelCalculationModel.java index c210d568d..c41f4c835 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SimpleIndelCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SimpleIndelCalculationModel.java @@ -1,6 +1,8 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.pileup.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.rodDbSNP; @@ -11,39 +13,103 @@ import java.util.*; public class SimpleIndelCalculationModel extends GenotypeCalculationModel { + private int MIN_COVERAGE = 6; + private double MIN_FRACTION = 0.3; + private double MIN_CONSENSUS_FRACTION = 0.7 ; // the previous normal event context - private Map cachedContext; +// private Map cachedContext; protected SimpleIndelCalculationModel() {} - private int testSkipCount = 5; + private int totalIndels = 0; + private int totalCoverage = 0; + private int bestIndelCount = 0; + private String bestEvent = null; + public VariantCallContext callLocus(RefMetaDataTracker tracker, char ref, GenomeLoc loc, Map contexts, DiploidGenotypePriors priors) { - cachedContext = contexts; +// cachedContext = contexts; return null; } public VariantCallContext callExtendedLocus(RefMetaDataTracker tracker, char[] ref, GenomeLoc loc, Map contexts) { + totalIndels = 0; + totalCoverage = 0; + bestIndelCount = 0; + bestEvent = null; + /* System.out.println("\nReached " + loc + " through an extended event"); for (Map.Entry e : contexts.entrySet()) { System.out.println("Set "+e.getKey()); System.out.println(" Context: "+e.getValue().getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size()+ " reads"); - System.out.println(" Cached context: "+ - cachedContext.get(e.getKey()).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size()+ " reads"); - System.out.println(" First read in cached context: "+ - cachedContext.get(e.getKey()).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup().getReads().get(0).getReadName()); ReadBackedExtendedEventPileup p = e.getValue().getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getExtendedEventPileup(); if ( p== null ) System.out.println("EXTENDED PILEUP IS NULL"); System.out.println(" Event(s): " + e.getValue().getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getExtendedEventPileup().getEventStringsWithCounts(ref)); } - if ( testSkipCount==0 ) System.exit(1); - testSkipCount--; - // TODO -- implement me +*/ + initializeAlleles(ref, contexts); + VariantCallContext vcc = new VariantCallContext(false); + + if ( totalIndels == 0 ) return vcc; // this can happen if indel-containing reads get filtered out by the engine + + if ( totalCoverage < MIN_COVERAGE ) return vcc; + + if ( ((double)bestIndelCount)/totalCoverage < MIN_FRACTION ) return vcc; + + if ( ((double)bestIndelCount)/totalIndels < MIN_CONSENSUS_FRACTION ) return vcc; + + List alleles = new ArrayList(2); + + if ( bestEvent.charAt(0) == '+') { + alleles.add( new Allele(Allele.NULL_ALLELE_STRING,true) ); + alleles.add( new Allele(bestEvent.substring(1), false )); + } else { + if ( bestEvent.charAt(0) == '-' ) { + alleles.add( new Allele(Allele.NULL_ALLELE_STRING,false) ); + alleles.add( new Allele(bestEvent.substring(1), true )); + loc = GenomeLocParser.setStop(loc, loc.getStop() + bestEvent.length()-2); + } else + throw new StingException("Internal error (probably a bug): event does not conform to expected format: "+ bestEvent); + } + + VariantContext vc = new VariantContext("UG_Indel_call", loc, alleles, new HashMap() /* genotypes */, + 0.0 /* log error */, null /* filters */, null /* attributes */); + + vcc = new VariantCallContext(vc,true); +/* + if ( totalIndels > 0 ) { + System.out.println("Calling: "+bestEvent+" ["+bestIndelCount+"/"+totalIndels+"/"+totalCoverage+"] at "+loc); + } else { + System.out.println("NO EVENT"); + } +*/ //VariantContext vc = new MutableVariantContext("UG_indel_call", loc, alleles, genotypes, phredScaledConfidence/10.0, null, attributes); //return new VariantCallContext(vc, phredScaledConfidence >= CONFIDENCE_THRESHOLD); - return null; + return vcc; + } + + protected void initializeAlleles(char [] ref, Map contexts) { + + + for ( String sample : contexts.keySet() ) { + AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE); + + totalCoverage += context.size(); + + // calculate the sum of quality scores for each base + ReadBackedExtendedEventPileup pileup = context.getExtendedEventPileup(); + + List> all_events = pileup.getEventStringsWithCounts(ref); + for ( Pair p : all_events ) { + if ( p.second > bestIndelCount ) { + bestIndelCount = p.second; + bestEvent = p.first; + } + totalIndels += p.second; + } + } } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 2d9e1df41..94b74556e 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -214,7 +214,7 @@ public class UnifiedGenotyper extends LocusWalker we're a ref site VariantCallContext(boolean confidentlyCalledP) { this.confidentlyCalled = confidentlyCalledP; } + + public void setRefAllele(String refAllele) { + this.refAllele = refAllele; + } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java index 4439b8f16..6290d6b4e 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java @@ -39,8 +39,10 @@ public interface GenotypeWriter { /** * Add a genotype, given a variant context * @param vc the variant context representing the call to add + * @param refAllele witers are allowed to ignore it; however it is required for VCF writers, as the + * VCF format explicitly requires (previous) ref base for an indel. Currently, refAllele is expected to hold a single character */ - public void addCall(VariantContext vc); + public void addCall(VariantContext vc, String refAllele); /** finish writing, closing any open files. */ public void close(); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java index 71865be20..0a3c4f65c 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java @@ -107,8 +107,9 @@ public class GeliAdapter implements GeliGenotypeWriter { * Add a genotype, given a variant context * * @param vc the variant context representing the call to add + * @param refAllele not used by this writer */ - public void addCall(VariantContext vc) { + public void addCall(VariantContext vc, String refAllele) { if ( writer == null ) throw new IllegalStateException("The Geli Header must be written before calls can be added"); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java index d5c478abe..57f6b5e01 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java @@ -68,8 +68,9 @@ public class GeliTextWriter implements GeliGenotypeWriter { * Add a genotype, given a variant context * * @param vc the variant context representing the call to add + * @param refAllele required by the inteface; not used by this writer. */ - public void addCall(VariantContext vc) { + public void addCall(VariantContext vc, String refAllele) { char ref = vc.getReference().toString().charAt(0); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java index 1b57353dc..3923860ea 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java @@ -148,8 +148,9 @@ public class GLFWriter implements GLFGenotypeWriter { * Add a genotype, given a variant context * * @param vc the variant context representing the call to add + * @param refAllele not used by this writer */ - public void addCall(VariantContext vc) { + public void addCall(VariantContext vc, String refAllele) { if ( headerText == null ) throw new IllegalStateException("The GLF Header must be written before calls can be added"); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java index 4c8981fb6..d6909692e 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java @@ -78,12 +78,14 @@ public class VCFGenotypeWriterAdapter implements VCFGenotypeWriter { * Add a genotype, given a variant context * * @param vc the variant context representing the call to add + * @param refAllele currently has to be a single character representing the reference base (the base + * immediately preceding the event in case of indels) */ - public void addCall(VariantContext vc) { + public void addCall(VariantContext vc, String refAllele) { if ( mHeader == null ) throw new IllegalStateException("The VCF Header must be written before records can be added"); - VCFRecord call = VariantContextAdaptors.toVCF(vc, vc.getReference().toString().charAt(0), allowedGenotypeFormatStrings, false, false); + VCFRecord call = VariantContextAdaptors.toVCF(vc, refAllele.charAt(0), allowedGenotypeFormatStrings, false, false); Set altAlleles = vc.getAlternateAlleles(); StringBuffer altAlleleCountString = new StringBuffer();