Because of the ugly VCF format, generic addCall() method of GenotypeWriter interface acquired an additional parameter, explicitly specified reference base (in VCF it's the base immediately *before* the event in case of indels, so we got to pass it). All implementing classes are modified to accomodate the change.

VCFGenotypeWriterAdapter now explicitly uses the passed reference base instead of deriving it from VatriantContext (in SNP mode as well!), other writers simply ignore that additional argument. 

SimpleIndelCalculationModel now WORKS (or rather, it does produce calls :) )

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3228 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
asivache 2010-04-21 18:19:03 +00:00
parent ab34397d2e
commit 1373fee278
11 changed files with 107 additions and 22 deletions

View File

@ -76,8 +76,8 @@ public abstract class GenotypeWriterStorage<T extends GenotypeWriter> implements
GenotypeWriterFactory.writeHeader(writer, stub.getSAMFileHeader(), samples, new HashSet<VCFHeaderLine>()); GenotypeWriterFactory.writeHeader(writer, stub.getSAMFileHeader(), samples, new HashSet<VCFHeaderLine>());
} }
public void addCall(VariantContext vc) { public void addCall(VariantContext vc, String refAllele) {
writer.addCall(vc); writer.addCall(vc,refAllele);
} }
public void close() { public void close() {

View File

@ -129,8 +129,8 @@ public abstract class GenotypeWriterStub<T extends GenotypeWriter> implements St
/** /**
* @{inheritDoc} * @{inheritDoc}
*/ */
public void addCall(VariantContext vc) { public void addCall(VariantContext vc, String refAllele) {
outputTracker.getStorage(this).addCall(vc); outputTracker.getStorage(this).addCall(vc,refAllele);
} }
/** /**

View File

@ -1,6 +1,8 @@
package org.broadinstitute.sting.gatk.walkers.genotyper; package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.pileup.*; import org.broadinstitute.sting.utils.pileup.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP; import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
@ -11,39 +13,103 @@ import java.util.*;
public class SimpleIndelCalculationModel extends GenotypeCalculationModel { public class SimpleIndelCalculationModel extends GenotypeCalculationModel {
private int MIN_COVERAGE = 6;
private double MIN_FRACTION = 0.3;
private double MIN_CONSENSUS_FRACTION = 0.7 ;
// the previous normal event context // the previous normal event context
private Map<String, StratifiedAlignmentContext> cachedContext; // private Map<String, StratifiedAlignmentContext> cachedContext;
protected SimpleIndelCalculationModel() {} protected SimpleIndelCalculationModel() {}
private int testSkipCount = 5; private int totalIndels = 0;
private int totalCoverage = 0;
private int bestIndelCount = 0;
private String bestEvent = null;
public VariantCallContext callLocus(RefMetaDataTracker tracker, char ref, GenomeLoc loc, Map<String, StratifiedAlignmentContext> contexts, DiploidGenotypePriors priors) { public VariantCallContext callLocus(RefMetaDataTracker tracker, char ref, GenomeLoc loc, Map<String, StratifiedAlignmentContext> contexts, DiploidGenotypePriors priors) {
cachedContext = contexts; // cachedContext = contexts;
return null; return null;
} }
public VariantCallContext callExtendedLocus(RefMetaDataTracker tracker, char[] ref, GenomeLoc loc, Map<String, StratifiedAlignmentContext> contexts) { public VariantCallContext callExtendedLocus(RefMetaDataTracker tracker, char[] ref, GenomeLoc loc, Map<String, StratifiedAlignmentContext> contexts) {
totalIndels = 0;
totalCoverage = 0;
bestIndelCount = 0;
bestEvent = null;
/*
System.out.println("\nReached " + loc + " through an extended event"); System.out.println("\nReached " + loc + " through an extended event");
for (Map.Entry<String,StratifiedAlignmentContext> e : contexts.entrySet()) { for (Map.Entry<String,StratifiedAlignmentContext> e : contexts.entrySet()) {
System.out.println("Set "+e.getKey()); System.out.println("Set "+e.getKey());
System.out.println(" Context: "+e.getValue().getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size()+ " reads"); System.out.println(" Context: "+e.getValue().getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size()+ " reads");
System.out.println(" Cached context: "+
cachedContext.get(e.getKey()).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size()+ " reads");
System.out.println(" First read in cached context: "+
cachedContext.get(e.getKey()).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup().getReads().get(0).getReadName());
ReadBackedExtendedEventPileup p = e.getValue().getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getExtendedEventPileup(); ReadBackedExtendedEventPileup p = e.getValue().getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getExtendedEventPileup();
if ( p== null ) System.out.println("EXTENDED PILEUP IS NULL"); if ( p== null ) System.out.println("EXTENDED PILEUP IS NULL");
System.out.println(" Event(s): " + e.getValue().getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getExtendedEventPileup().getEventStringsWithCounts(ref)); System.out.println(" Event(s): " + e.getValue().getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getExtendedEventPileup().getEventStringsWithCounts(ref));
} }
if ( testSkipCount==0 ) System.exit(1); */
testSkipCount--; initializeAlleles(ref, contexts);
// TODO -- implement me
VariantCallContext vcc = new VariantCallContext(false);
if ( totalIndels == 0 ) return vcc; // this can happen if indel-containing reads get filtered out by the engine
if ( totalCoverage < MIN_COVERAGE ) return vcc;
if ( ((double)bestIndelCount)/totalCoverage < MIN_FRACTION ) return vcc;
if ( ((double)bestIndelCount)/totalIndels < MIN_CONSENSUS_FRACTION ) return vcc;
List<Allele> alleles = new ArrayList<Allele>(2);
if ( bestEvent.charAt(0) == '+') {
alleles.add( new Allele(Allele.NULL_ALLELE_STRING,true) );
alleles.add( new Allele(bestEvent.substring(1), false ));
} else {
if ( bestEvent.charAt(0) == '-' ) {
alleles.add( new Allele(Allele.NULL_ALLELE_STRING,false) );
alleles.add( new Allele(bestEvent.substring(1), true ));
loc = GenomeLocParser.setStop(loc, loc.getStop() + bestEvent.length()-2);
} else
throw new StingException("Internal error (probably a bug): event does not conform to expected format: "+ bestEvent);
}
VariantContext vc = new VariantContext("UG_Indel_call", loc, alleles, new HashMap<String,Genotype>() /* genotypes */,
0.0 /* log error */, null /* filters */, null /* attributes */);
vcc = new VariantCallContext(vc,true);
/*
if ( totalIndels > 0 ) {
System.out.println("Calling: "+bestEvent+" ["+bestIndelCount+"/"+totalIndels+"/"+totalCoverage+"] at "+loc);
} else {
System.out.println("NO EVENT");
}
*/
//VariantContext vc = new MutableVariantContext("UG_indel_call", loc, alleles, genotypes, phredScaledConfidence/10.0, null, attributes); //VariantContext vc = new MutableVariantContext("UG_indel_call", loc, alleles, genotypes, phredScaledConfidence/10.0, null, attributes);
//return new VariantCallContext(vc, phredScaledConfidence >= CONFIDENCE_THRESHOLD); //return new VariantCallContext(vc, phredScaledConfidence >= CONFIDENCE_THRESHOLD);
return null; return vcc;
}
protected void initializeAlleles(char [] ref, Map<String, StratifiedAlignmentContext> contexts) {
for ( String sample : contexts.keySet() ) {
AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE);
totalCoverage += context.size();
// calculate the sum of quality scores for each base
ReadBackedExtendedEventPileup pileup = context.getExtendedEventPileup();
List<Pair<String,Integer>> all_events = pileup.getEventStringsWithCounts(ref);
for ( Pair<String,Integer> p : all_events ) {
if ( p.second > bestIndelCount ) {
bestIndelCount = p.second;
bestEvent = p.first;
}
totalIndels += p.second;
}
}
} }
} }

View File

@ -214,7 +214,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
return sum; return sum;
try { try {
writer.addCall(value.vc); writer.addCall(value.vc, value.refAllele);
} catch (IllegalArgumentException e) { } catch (IllegalArgumentException e) {
throw new IllegalArgumentException(e.getMessage() + "; this is often caused by using the --assume_single_sample_reads argument with the wrong sample name"); throw new IllegalArgumentException(e.getMessage() + "; this is often caused by using the --assume_single_sample_reads argument with the wrong sample name");
} }

View File

@ -230,6 +230,7 @@ public class UnifiedGenotyperEngine {
} }
} }
if ( call != null ) call.setRefAllele(Character.toString(ref));
return call; return call;
} }

View File

@ -12,6 +12,7 @@ import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
*/ */
public class VariantCallContext { public class VariantCallContext {
public VariantContext vc = null; public VariantContext vc = null;
public String refAllele = null;
// Was the site called confidently, either reference or variant? // Was the site called confidently, either reference or variant?
public boolean confidentlyCalled = false; public boolean confidentlyCalled = false;
@ -21,8 +22,18 @@ public class VariantCallContext {
this.confidentlyCalled = confidentlyCalledP; this.confidentlyCalled = confidentlyCalledP;
} }
VariantCallContext(VariantContext vc, String refAllele, boolean confidentlyCalledP) {
this.vc = vc;
this.refAllele = refAllele;
this.confidentlyCalled = confidentlyCalledP;
}
// blank variant context => we're a ref site // blank variant context => we're a ref site
VariantCallContext(boolean confidentlyCalledP) { VariantCallContext(boolean confidentlyCalledP) {
this.confidentlyCalled = confidentlyCalledP; this.confidentlyCalled = confidentlyCalledP;
} }
public void setRefAllele(String refAllele) {
this.refAllele = refAllele;
}
} }

View File

@ -39,8 +39,10 @@ public interface GenotypeWriter {
/** /**
* Add a genotype, given a variant context * Add a genotype, given a variant context
* @param vc the variant context representing the call to add * @param vc the variant context representing the call to add
* @param refAllele witers are allowed to ignore it; however it is required for VCF writers, as the
* VCF format explicitly requires (previous) ref base for an indel. Currently, refAllele is expected to hold a single character
*/ */
public void addCall(VariantContext vc); public void addCall(VariantContext vc, String refAllele);
/** finish writing, closing any open files. */ /** finish writing, closing any open files. */
public void close(); public void close();

View File

@ -107,8 +107,9 @@ public class GeliAdapter implements GeliGenotypeWriter {
* Add a genotype, given a variant context * Add a genotype, given a variant context
* *
* @param vc the variant context representing the call to add * @param vc the variant context representing the call to add
* @param refAllele not used by this writer
*/ */
public void addCall(VariantContext vc) { public void addCall(VariantContext vc, String refAllele) {
if ( writer == null ) if ( writer == null )
throw new IllegalStateException("The Geli Header must be written before calls can be added"); throw new IllegalStateException("The Geli Header must be written before calls can be added");

View File

@ -68,8 +68,9 @@ public class GeliTextWriter implements GeliGenotypeWriter {
* Add a genotype, given a variant context * Add a genotype, given a variant context
* *
* @param vc the variant context representing the call to add * @param vc the variant context representing the call to add
* @param refAllele required by the inteface; not used by this writer.
*/ */
public void addCall(VariantContext vc) { public void addCall(VariantContext vc, String refAllele) {
char ref = vc.getReference().toString().charAt(0); char ref = vc.getReference().toString().charAt(0);

View File

@ -148,8 +148,9 @@ public class GLFWriter implements GLFGenotypeWriter {
* Add a genotype, given a variant context * Add a genotype, given a variant context
* *
* @param vc the variant context representing the call to add * @param vc the variant context representing the call to add
* @param refAllele not used by this writer
*/ */
public void addCall(VariantContext vc) { public void addCall(VariantContext vc, String refAllele) {
if ( headerText == null ) if ( headerText == null )
throw new IllegalStateException("The GLF Header must be written before calls can be added"); throw new IllegalStateException("The GLF Header must be written before calls can be added");

View File

@ -78,12 +78,14 @@ public class VCFGenotypeWriterAdapter implements VCFGenotypeWriter {
* Add a genotype, given a variant context * Add a genotype, given a variant context
* *
* @param vc the variant context representing the call to add * @param vc the variant context representing the call to add
* @param refAllele currently has to be a single character representing the reference base (the base
* immediately preceding the event in case of indels)
*/ */
public void addCall(VariantContext vc) { public void addCall(VariantContext vc, String refAllele) {
if ( mHeader == null ) if ( mHeader == null )
throw new IllegalStateException("The VCF Header must be written before records can be added"); throw new IllegalStateException("The VCF Header must be written before records can be added");
VCFRecord call = VariantContextAdaptors.toVCF(vc, vc.getReference().toString().charAt(0), allowedGenotypeFormatStrings, false, false); VCFRecord call = VariantContextAdaptors.toVCF(vc, refAllele.charAt(0), allowedGenotypeFormatStrings, false, false);
Set<Allele> altAlleles = vc.getAlternateAlleles(); Set<Allele> altAlleles = vc.getAlternateAlleles();
StringBuffer altAlleleCountString = new StringBuffer(); StringBuffer altAlleleCountString = new StringBuffer();