Because of the ugly VCF format, generic addCall() method of GenotypeWriter interface acquired an additional parameter, explicitly specified reference base (in VCF it's the base immediately *before* the event in case of indels, so we got to pass it). All implementing classes are modified to accomodate the change.

VCFGenotypeWriterAdapter now explicitly uses the passed reference base instead of deriving it from VatriantContext (in SNP mode as well!), other writers simply ignore that additional argument. 

SimpleIndelCalculationModel now WORKS (or rather, it does produce calls :) )

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3228 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
asivache 2010-04-21 18:19:03 +00:00
parent ab34397d2e
commit 1373fee278
11 changed files with 107 additions and 22 deletions

View File

@ -76,8 +76,8 @@ public abstract class GenotypeWriterStorage<T extends GenotypeWriter> implements
GenotypeWriterFactory.writeHeader(writer, stub.getSAMFileHeader(), samples, new HashSet<VCFHeaderLine>());
}
public void addCall(VariantContext vc) {
writer.addCall(vc);
public void addCall(VariantContext vc, String refAllele) {
writer.addCall(vc,refAllele);
}
public void close() {

View File

@ -129,8 +129,8 @@ public abstract class GenotypeWriterStub<T extends GenotypeWriter> implements St
/**
* @{inheritDoc}
*/
public void addCall(VariantContext vc) {
outputTracker.getStorage(this).addCall(vc);
public void addCall(VariantContext vc, String refAllele) {
outputTracker.getStorage(this).addCall(vc,refAllele);
}
/**

View File

@ -1,6 +1,8 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.pileup.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
@ -11,39 +13,103 @@ import java.util.*;
public class SimpleIndelCalculationModel extends GenotypeCalculationModel {
private int MIN_COVERAGE = 6;
private double MIN_FRACTION = 0.3;
private double MIN_CONSENSUS_FRACTION = 0.7 ;
// the previous normal event context
private Map<String, StratifiedAlignmentContext> cachedContext;
// private Map<String, StratifiedAlignmentContext> cachedContext;
protected SimpleIndelCalculationModel() {}
private int testSkipCount = 5;
private int totalIndels = 0;
private int totalCoverage = 0;
private int bestIndelCount = 0;
private String bestEvent = null;
public VariantCallContext callLocus(RefMetaDataTracker tracker, char ref, GenomeLoc loc, Map<String, StratifiedAlignmentContext> contexts, DiploidGenotypePriors priors) {
cachedContext = contexts;
// cachedContext = contexts;
return null;
}
public VariantCallContext callExtendedLocus(RefMetaDataTracker tracker, char[] ref, GenomeLoc loc, Map<String, StratifiedAlignmentContext> contexts) {
totalIndels = 0;
totalCoverage = 0;
bestIndelCount = 0;
bestEvent = null;
/*
System.out.println("\nReached " + loc + " through an extended event");
for (Map.Entry<String,StratifiedAlignmentContext> e : contexts.entrySet()) {
System.out.println("Set "+e.getKey());
System.out.println(" Context: "+e.getValue().getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size()+ " reads");
System.out.println(" Cached context: "+
cachedContext.get(e.getKey()).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size()+ " reads");
System.out.println(" First read in cached context: "+
cachedContext.get(e.getKey()).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup().getReads().get(0).getReadName());
ReadBackedExtendedEventPileup p = e.getValue().getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getExtendedEventPileup();
if ( p== null ) System.out.println("EXTENDED PILEUP IS NULL");
System.out.println(" Event(s): " + e.getValue().getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getExtendedEventPileup().getEventStringsWithCounts(ref));
}
if ( testSkipCount==0 ) System.exit(1);
testSkipCount--;
// TODO -- implement me
*/
initializeAlleles(ref, contexts);
VariantCallContext vcc = new VariantCallContext(false);
if ( totalIndels == 0 ) return vcc; // this can happen if indel-containing reads get filtered out by the engine
if ( totalCoverage < MIN_COVERAGE ) return vcc;
if ( ((double)bestIndelCount)/totalCoverage < MIN_FRACTION ) return vcc;
if ( ((double)bestIndelCount)/totalIndels < MIN_CONSENSUS_FRACTION ) return vcc;
List<Allele> alleles = new ArrayList<Allele>(2);
if ( bestEvent.charAt(0) == '+') {
alleles.add( new Allele(Allele.NULL_ALLELE_STRING,true) );
alleles.add( new Allele(bestEvent.substring(1), false ));
} else {
if ( bestEvent.charAt(0) == '-' ) {
alleles.add( new Allele(Allele.NULL_ALLELE_STRING,false) );
alleles.add( new Allele(bestEvent.substring(1), true ));
loc = GenomeLocParser.setStop(loc, loc.getStop() + bestEvent.length()-2);
} else
throw new StingException("Internal error (probably a bug): event does not conform to expected format: "+ bestEvent);
}
VariantContext vc = new VariantContext("UG_Indel_call", loc, alleles, new HashMap<String,Genotype>() /* genotypes */,
0.0 /* log error */, null /* filters */, null /* attributes */);
vcc = new VariantCallContext(vc,true);
/*
if ( totalIndels > 0 ) {
System.out.println("Calling: "+bestEvent+" ["+bestIndelCount+"/"+totalIndels+"/"+totalCoverage+"] at "+loc);
} else {
System.out.println("NO EVENT");
}
*/
//VariantContext vc = new MutableVariantContext("UG_indel_call", loc, alleles, genotypes, phredScaledConfidence/10.0, null, attributes);
//return new VariantCallContext(vc, phredScaledConfidence >= CONFIDENCE_THRESHOLD);
return null;
return vcc;
}
protected void initializeAlleles(char [] ref, Map<String, StratifiedAlignmentContext> contexts) {
for ( String sample : contexts.keySet() ) {
AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE);
totalCoverage += context.size();
// calculate the sum of quality scores for each base
ReadBackedExtendedEventPileup pileup = context.getExtendedEventPileup();
List<Pair<String,Integer>> all_events = pileup.getEventStringsWithCounts(ref);
for ( Pair<String,Integer> p : all_events ) {
if ( p.second > bestIndelCount ) {
bestIndelCount = p.second;
bestEvent = p.first;
}
totalIndels += p.second;
}
}
}
}

View File

@ -214,7 +214,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
return sum;
try {
writer.addCall(value.vc);
writer.addCall(value.vc, value.refAllele);
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException(e.getMessage() + "; this is often caused by using the --assume_single_sample_reads argument with the wrong sample name");
}

View File

@ -230,6 +230,7 @@ public class UnifiedGenotyperEngine {
}
}
if ( call != null ) call.setRefAllele(Character.toString(ref));
return call;
}

View File

@ -12,6 +12,7 @@ import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
*/
public class VariantCallContext {
public VariantContext vc = null;
public String refAllele = null;
// Was the site called confidently, either reference or variant?
public boolean confidentlyCalled = false;
@ -21,8 +22,18 @@ public class VariantCallContext {
this.confidentlyCalled = confidentlyCalledP;
}
VariantCallContext(VariantContext vc, String refAllele, boolean confidentlyCalledP) {
this.vc = vc;
this.refAllele = refAllele;
this.confidentlyCalled = confidentlyCalledP;
}
// blank variant context => we're a ref site
VariantCallContext(boolean confidentlyCalledP) {
this.confidentlyCalled = confidentlyCalledP;
}
public void setRefAllele(String refAllele) {
this.refAllele = refAllele;
}
}

View File

@ -39,8 +39,10 @@ public interface GenotypeWriter {
/**
* Add a genotype, given a variant context
* @param vc the variant context representing the call to add
* @param refAllele witers are allowed to ignore it; however it is required for VCF writers, as the
* VCF format explicitly requires (previous) ref base for an indel. Currently, refAllele is expected to hold a single character
*/
public void addCall(VariantContext vc);
public void addCall(VariantContext vc, String refAllele);
/** finish writing, closing any open files. */
public void close();

View File

@ -107,8 +107,9 @@ public class GeliAdapter implements GeliGenotypeWriter {
* Add a genotype, given a variant context
*
* @param vc the variant context representing the call to add
* @param refAllele not used by this writer
*/
public void addCall(VariantContext vc) {
public void addCall(VariantContext vc, String refAllele) {
if ( writer == null )
throw new IllegalStateException("The Geli Header must be written before calls can be added");

View File

@ -68,8 +68,9 @@ public class GeliTextWriter implements GeliGenotypeWriter {
* Add a genotype, given a variant context
*
* @param vc the variant context representing the call to add
* @param refAllele required by the inteface; not used by this writer.
*/
public void addCall(VariantContext vc) {
public void addCall(VariantContext vc, String refAllele) {
char ref = vc.getReference().toString().charAt(0);

View File

@ -148,8 +148,9 @@ public class GLFWriter implements GLFGenotypeWriter {
* Add a genotype, given a variant context
*
* @param vc the variant context representing the call to add
* @param refAllele not used by this writer
*/
public void addCall(VariantContext vc) {
public void addCall(VariantContext vc, String refAllele) {
if ( headerText == null )
throw new IllegalStateException("The GLF Header must be written before calls can be added");

View File

@ -78,12 +78,14 @@ public class VCFGenotypeWriterAdapter implements VCFGenotypeWriter {
* Add a genotype, given a variant context
*
* @param vc the variant context representing the call to add
* @param refAllele currently has to be a single character representing the reference base (the base
* immediately preceding the event in case of indels)
*/
public void addCall(VariantContext vc) {
public void addCall(VariantContext vc, String refAllele) {
if ( mHeader == null )
throw new IllegalStateException("The VCF Header must be written before records can be added");
VCFRecord call = VariantContextAdaptors.toVCF(vc, vc.getReference().toString().charAt(0), allowedGenotypeFormatStrings, false, false);
VCFRecord call = VariantContextAdaptors.toVCF(vc, refAllele.charAt(0), allowedGenotypeFormatStrings, false, false);
Set<Allele> altAlleles = vc.getAlternateAlleles();
StringBuffer altAlleleCountString = new StringBuffer();