Stage 1 of massive Variation/Genotype refactoring.

This stage consists only of the code originating in the Genotyper and flowing through to the genotype writers.  I haven't finished refactoring the writers and haven't even touched the readers at all.

The major changes here are that
1. Variations which are BackedByGenotypes are now correctly associated with those Genotypes
2. Genotypes which have an associated Variation can actually be associated with it (and then return it when toVariation() is called).

The only integration tests which need to be updated are MSG-related (because the refactoring now made it easy for me to prevent MSG from emitting tri-allelic sites).



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2269 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-12-06 03:12:41 +00:00
parent b817db0962
commit 08f2214f14
38 changed files with 475 additions and 175 deletions

View File

@ -11,9 +11,12 @@ import java.util.ArrayList;
public class AlleleBalance extends StandardVariantAnnotation {
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes) {
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) {
if ( genotypes.size() == 0 )
if ( !(variation instanceof VariantBackedByGenotype) )
return null;
final List<Genotype> genotypes = ((VariantBackedByGenotype)variation).getGenotypes();
if ( genotypes == null || genotypes.size() == 0 )
return null;
double ratio;

View File

@ -2,15 +2,12 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.util.List;
public class DepthOfCoverage extends StandardVariantAnnotation {
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes) {
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) {
int depth = pileup.getReads().size();
return String.format("%d", depth);
}

View File

@ -5,6 +5,7 @@ import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
import net.sf.samtools.SAMRecord;
import cern.jet.math.Arithmetic;
@ -13,7 +14,13 @@ import java.util.List;
public class FisherStrand implements VariantAnnotation {
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes) {
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) {
if ( !(variation instanceof VariantBackedByGenotype) )
return null;
final List<Genotype> genotypes = ((VariantBackedByGenotype)variation).getGenotypes();
if ( genotypes == null || genotypes.size() == 0 )
return null;
// this test doesn't make sense for homs
Genotype genotype = VariantAnnotator.getFirstHetVariant(genotypes);

View File

@ -3,15 +3,12 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.util.List;
public class HomopolymerRun extends StandardVariantAnnotation {
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes) {
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) {
if ( !variation.isBiallelic() || !variation.isSNP() )
return null;

View File

@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.Variation;
import net.sf.samtools.SAMRecord;
@ -11,7 +10,7 @@ import java.util.List;
public class MappingQualityZero extends StandardVariantAnnotation {
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes) {
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) {
List<SAMRecord> reads = pileup.getReads();
int MQ0Count = 0;
for (int i=0; i < reads.size(); i++) {

View File

@ -3,7 +3,6 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.Variation;
import net.sf.samtools.SAMRecord;
@ -12,7 +11,7 @@ import java.util.List;
public class RMSMappingQuality extends StandardVariantAnnotation {
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes) {
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) {
List<SAMRecord> reads = pileup.getReads();
int[] qualities = new int[reads.size()];
for (int i=0; i < reads.size(); i++)

View File

@ -14,9 +14,12 @@ public class RankSumTest implements VariantAnnotation {
private final static boolean DEBUG = false;
private static final double minPValue = 1e-10;
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes) {
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) {
if ( genotypes.size() == 0 )
if ( !(variation instanceof VariantBackedByGenotype) )
return null;
final List<Genotype> genotypes = ((VariantBackedByGenotype)variation).getGenotypes();
if ( genotypes == null || genotypes.size() == 0 )
return null;
// this test doesn't make sense for homs

View File

@ -4,11 +4,9 @@ import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import java.util.List;
/**
* Created by IntelliJ IDEA.
@ -30,7 +28,7 @@ public class SecondBaseSkew implements VariantAnnotation {
public String getDescription() { return KEY_NAME + ",1,Float,\"Chi-square Secondary Base Skew\""; }
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes) {
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) {
if ( variation.isSNP() && variation.isBiallelic() ) {
char snp = variation.getAlternativeBaseForSNP();
Pair<Integer,Double> depthProp = getSecondaryPileupNonrefEstimator(ref.getBase(), pileup, snp);

View File

@ -2,15 +2,12 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.util.List;
public class SpanningDeletions extends StandardVariantAnnotation {
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes) {
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) {
int deletions = pileup.getNumberOfDeletions();
return String.format("%.2f", (double)deletions/(double)pileup.size());
}

View File

@ -2,15 +2,13 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.util.List;
public interface VariantAnnotation {
// return the annotation for the given locus data (return null for no annotation)
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List<Genotype> genotypes);
public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation);
// return true if you want to use a context with mapping quality zero reads, false otherwise
public boolean useZeroQualityReads();

View File

@ -164,12 +164,8 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
// if the reference base is not ambiguous, the variant is a SNP, and it's the appropriate type, we can annotate
if ( BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 &&
variant.isBiallelic() &&
variant.isSNP() &&
variant instanceof VariantBackedByGenotype ) {
final List<org.broadinstitute.sting.utils.genotype.Genotype> genotypes = ((VariantBackedByGenotype)variant).getGenotypes();
if ( genotypes != null )
annotations = getAnnotations(ref, context, variant, genotypes, requestedAnnotations);
}
variant.isSNP() )
annotations = getAnnotations(ref, context, variant, requestedAnnotations);
writeVCF(tracker, ref, context, variant, annotations);
@ -177,30 +173,30 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
}
// option #1: don't specify annotations to be used: standard annotations are used by default
public static Map<String, String> getAnnotations(ReferenceContext ref, AlignmentContext context, Variation variation, List<Genotype> genotypes) {
public static Map<String, String> getAnnotations(ReferenceContext ref, AlignmentContext context, Variation variation) {
if ( standardAnnotations == null )
determineAllAnnotations();
return getAnnotations(ref, context, variation, genotypes, standardAnnotations.values());
return getAnnotations(ref, context, variation, standardAnnotations.values());
}
// option #2: specify that all possible annotations be used
public static Map<String, String> getAllAnnotations(ReferenceContext ref, AlignmentContext context, Variation variation, List<Genotype> genotypes) {
public static Map<String, String> getAllAnnotations(ReferenceContext ref, AlignmentContext context, Variation variation) {
if ( allAnnotations == null )
determineAllAnnotations();
return getAnnotations(ref, context, variation, genotypes, allAnnotations.values());
return getAnnotations(ref, context, variation, allAnnotations.values());
}
// option #3: specify the exact annotations to be used
public static Map<String, String> getAnnotations(ReferenceContext ref, AlignmentContext context, Variation variation, List<Genotype> genotypes, Collection<VariantAnnotation> annotations) {
public static Map<String, String> getAnnotations(ReferenceContext ref, AlignmentContext context, Variation variation, Collection<VariantAnnotation> annotations) {
HashMap<String, String> results = new HashMap<String, String>();
// set up the pileup for the full collection of reads at this position
ReadBackedPileup fullPileup = context.getPileup();
ReadBackedPileup MQ0freePileup = fullPileup.getPileupWithoutMappingQualityZeroReads();
HashMap<String, String> results = new HashMap<String, String>();
for ( VariantAnnotation annotator : annotations) {
String annot = annotator.annotate(ref, (annotator.useZeroQualityReads() ? fullPileup : MQ0freePileup), variation, genotypes);
String annot = annotator.annotate(ref, (annotator.useZeroQualityReads() ? fullPileup : MQ0freePileup), variation);
if ( annot != null ) {
results.put(annotator.getKeyName(), annot);
}

View File

@ -79,7 +79,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
for ( String sample : GLs.keySet() ) {
// create the call
Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, loc);
GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, loc);
if ( call instanceof ReadBacked ) {
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedContext.OVERALL).getPileup();

View File

@ -19,7 +19,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
protected EMGenotypeCalculationModel() {}
public Pair<List<Genotype>, GenotypeLocusData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
public Pair<VariationCall, List<Genotype>> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
// keep track of the context for each sample, overall and separated by strand
HashMap<String, AlignmentContextBySample> contexts = splitContextBySample(context);
@ -45,11 +45,13 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
// are we above the lod threshold for emitting calls (and not in all-bases mode)?
if ( !ALL_BASE_MODE && (bestIsRef || phredScaledConfidence < CONFIDENCE_THRESHOLD) ) {
return new Pair<List<Genotype>, GenotypeLocusData>(null, null);
return new Pair<VariationCall, List<Genotype>>(null, null);
}
// generate the calls
GenotypeLocusData locusdata = GenotypeWriterFactory.createSupportedGenotypeLocusData(OUTPUT_FORMAT, ref, context.getLocation(), Variation.VARIANT_TYPE.SNP);
List<Genotype> calls = genotypeCallsFromGenotypeLikelihoods(overall, ref, contexts);
VariationCall locusdata = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, context.getLocation(), bestIsRef ? Variation.VARIANT_TYPE.REFERENCE : Variation.VARIANT_TYPE.SNP);
if ( locusdata != null ) {
if ( locusdata instanceof ConfidenceBacked ) {
((ConfidenceBacked)locusdata).setConfidence(phredScaledConfidence);
@ -79,8 +81,13 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
((SLODBacked)locusdata).setSLOD(strandScore);
}
locusdata.setNonRefAlleleFrequency(overall.getMAF());
// finally, associate the Variation with the Genotypes
locusdata.setGenotypeCalls(calls);
for ( Genotype call : calls )
((GenotypeCall)call).setVariation(locusdata);
}
return new Pair<List<Genotype>, GenotypeLocusData>(genotypeCallsFromGenotypeLikelihoods(overall, ref, contexts), locusdata);
return new Pair<VariationCall, List<Genotype>>(locusdata, calls);
}
protected List<Genotype> genotypeCallsFromGenotypeLikelihoods(EMOutput results, char ref, HashMap<String, AlignmentContextBySample> contexts) {
@ -93,7 +100,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
// create the call
AlignmentContext context = contexts.get(sample).getContext(StratifiedContext.OVERALL);
Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, context.getLocation());
GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, context.getLocation());
if ( call instanceof ReadBacked ) {
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedContext.OVERALL).getPileup();

View File

@ -7,6 +7,8 @@ import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.apache.log4j.Logger;
import java.io.*;
@ -102,10 +104,10 @@ public abstract class GenotypeCalculationModel implements Cloneable {
*
* @return calls
*/
public abstract Pair<List<Genotype>, GenotypeLocusData> calculateGenotype(RefMetaDataTracker tracker,
char ref,
AlignmentContext context,
DiploidGenotypePriors priors);
public abstract Pair<VariationCall, List<Genotype>> calculateGenotype(RefMetaDataTracker tracker,
char ref,
AlignmentContext context,
DiploidGenotypePriors priors);
/**
* @param tracker rod data
@ -137,15 +139,11 @@ public abstract class GenotypeCalculationModel implements Cloneable {
HashMap<String, AlignmentContextBySample> contexts = new HashMap<String, AlignmentContextBySample>();
int deletionsInPileup = 0;
List<SAMRecord> reads = context.getReads();
List<Integer> offsets = context.getOffsets();
for (int i = 0; i < reads.size(); i++) {
ReadBackedPileup pileup = context.getPileup();
for (PileupElement p : pileup ) {
// get the read and offset
SAMRecord read = reads.get(i);
int offset = offsets.get(i);
SAMRecord read = p.getRead();
// find the sample
String sample;
@ -165,19 +163,15 @@ public abstract class GenotypeCalculationModel implements Cloneable {
contexts.put(sample, myContext);
}
// check for deletions
if ( offset == -1 )
deletionsInPileup++;
// add the read to this sample's context
// note that bad bases are added to the context (for DoC calculations later)
myContext.add(read, offset);
myContext.add(read, p.getOffset());
}
// are there too many deletions in the pileup?
if ( maxDeletionFractionInPileup != -1.0 &&
(double)deletionsInPileup / (double)reads.size() > maxDeletionFractionInPileup )
(double)pileup.getNumberOfDeletions() / (double)pileup.size() > maxDeletionFractionInPileup )
return null;
return contexts;
@ -221,19 +215,19 @@ public abstract class GenotypeCalculationModel implements Cloneable {
private AlignmentContext getOverallContext() {
if ( overall == null )
overall = new AlignmentContext(loc, allReads, allOffsets);
overall = new AlignmentContext(loc, new ReadBackedPileup(loc, allReads, allOffsets));
return overall;
}
private AlignmentContext getForwardContext() {
if ( forward == null )
forward = new AlignmentContext(loc, forwardReads, forwardOffsets);
forward = new AlignmentContext(loc, new ReadBackedPileup(loc, forwardReads, forwardOffsets));
return forward;
}
private AlignmentContext getReverseContext() {
if ( reverse == null )
reverse = new AlignmentContext(loc, reverseReads, reverseOffsets);
reverse = new AlignmentContext(loc, new ReadBackedPileup(loc, reverseReads, reverseOffsets));
return reverse;
}

View File

@ -35,7 +35,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
protected JointEstimateGenotypeCalculationModel() {}
public Pair<List<Genotype>, GenotypeLocusData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
public Pair<VariationCall, List<Genotype>> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
// keep track of the context for each sample, overall and separated by strand
HashMap<String, AlignmentContextBySample> contexts = createContexts(context);
@ -49,7 +49,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
initializeBestAlternateAllele(ref, context);
// if there are no non-ref bases and we don't want all bases, then we can just return
if ( !ALL_BASE_MODE && bestAlternateAllele == null )
return new Pair<List<Genotype>, GenotypeLocusData>(null, null);
return new Pair<VariationCall, List<Genotype>>(null, null);
initializeAlleleFrequencies(frequencyEstimationPoints);
@ -302,7 +302,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
return new ArrayList<Genotype>();
}
protected Pair<List<Genotype>, GenotypeLocusData> createCalls(RefMetaDataTracker tracker, char ref, HashMap<String, AlignmentContextBySample> contexts, GenomeLoc loc, int frequencyEstimationPoints) {
protected Pair<VariationCall, List<Genotype>> createCalls(RefMetaDataTracker tracker, char ref, HashMap<String, AlignmentContextBySample> contexts, GenomeLoc loc, int frequencyEstimationPoints) {
// only need to look at the most likely alternate allele
int indexOfMax = BaseUtils.simpleBaseToBaseIndex(bestAlternateAllele);
@ -313,14 +313,14 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
// return a null call if we don't pass the confidence cutoff or the most likely allele frequency is zero
if ( !ALL_BASE_MODE && (bestAFguess == 0 || phredScaledConfidence < CONFIDENCE_THRESHOLD) )
return new Pair<List<Genotype>, GenotypeLocusData>(null, null);
return new Pair<VariationCall, List<Genotype>>(null, null);
// populate the sample-specific data
List<Genotype> calls = makeGenotypeCalls(ref, bestAlternateAllele, contexts, loc);
// next, the general locus data
// note that calculating strand bias involves overwriting data structures, so we do that last
GenotypeLocusData locusdata = GenotypeWriterFactory.createSupportedGenotypeLocusData(OUTPUT_FORMAT, ref, loc, VARIANT_TYPE.SNP);
// *** note that calculating strand bias involves overwriting data structures, so we do that last
VariationCall locusdata = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, loc, bestAFguess == 0 ? VARIANT_TYPE.REFERENCE : VARIANT_TYPE.SNP);
if ( locusdata != null ) {
locusdata.addAlternateAllele(bestAlternateAllele.toString());
locusdata.setNonRefAlleleFrequency((double)bestAFguess / (double)(frequencyEstimationPoints-1));
@ -373,6 +373,13 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
}
}
return new Pair<List<Genotype>, GenotypeLocusData>(calls, locusdata);
// finally, associate the Variation with the Genotypes (if available)
if ( locusdata != null ) {
locusdata.setGenotypeCalls(calls);
for ( Genotype call : calls )
((GenotypeCall)call).setVariation(locusdata);
}
return new Pair<VariationCall, List<Genotype>>(locusdata, calls);
}
}

View File

@ -26,7 +26,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
// overload this method so we can special-case the single sample
public Pair<List<Genotype>, GenotypeLocusData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
public Pair<VariationCall, List<Genotype>> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
// we don't actually want to run EM for single samples
if ( samples.size() == 1 ) {
@ -45,7 +45,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
return null;
// get the genotype likelihoods
Pair<ReadBackedPileup, GenotypeLikelihoods> discoveryGL = getSingleSampleLikelihoods(ref, sampleContext, priors, StratifiedContext.OVERALL);
Pair<ReadBackedPileup, GenotypeLikelihoods> discoveryGL = getSingleSampleLikelihoods(sampleContext, priors, StratifiedContext.OVERALL);
// find the index of the best genotype
double[] posteriors = discoveryGL.second.getNormalizedPosteriors();
@ -68,11 +68,12 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
// are we above the lod threshold for emitting calls (and not in all-bases mode)?
if ( !ALL_BASE_MODE && (bestIsRef || phredScaledConfidence < CONFIDENCE_THRESHOLD) ) {
return new Pair<List<Genotype>, GenotypeLocusData>(null, null);
return new Pair<VariationCall, List<Genotype>>(null, null);
}
// we can now create the genotype call object
Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, context.getLocation());
GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, context.getLocation());
call.setVariation(null);
if ( call instanceof ReadBacked ) {
((ReadBacked)call).setPileup(discoveryGL.first);
@ -87,7 +88,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
((PosteriorsBacked)call).setPosteriors(discoveryGL.second.getPosteriors());
}
GenotypeLocusData locusdata = GenotypeWriterFactory.createSupportedGenotypeLocusData(OUTPUT_FORMAT, ref, context.getLocation(), Variation.VARIANT_TYPE.SNP);
VariationCall locusdata = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, context.getLocation(), bestIsRef ? Variation.VARIANT_TYPE.REFERENCE : Variation.VARIANT_TYPE.SNP);
if ( locusdata != null ) {
if ( locusdata instanceof ConfidenceBacked ) {
((ConfidenceBacked)locusdata).setConfidence(phredScaledConfidence);
@ -99,13 +100,13 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
}
}
return new Pair<List<Genotype>, GenotypeLocusData>(Arrays.asList(call), locusdata);
return new Pair<VariationCall, List<Genotype>>(locusdata, Arrays.asList((Genotype)call));
}
return super.calculateGenotype(tracker, ref, context, priors);
}
private Pair<ReadBackedPileup, GenotypeLikelihoods> getSingleSampleLikelihoods(char ref, AlignmentContextBySample sampleContext, DiploidGenotypePriors priors, StratifiedContext contextType) {
private Pair<ReadBackedPileup, GenotypeLikelihoods> getSingleSampleLikelihoods(AlignmentContextBySample sampleContext, DiploidGenotypePriors priors, StratifiedContext contextType) {
// create the pileup
AlignmentContext myContext = sampleContext.getContext(contextType);
ReadBackedPileup pileup = myContext.getPileup();

View File

@ -44,7 +44,7 @@ import java.util.*;
@Reference(window=@Window(start=-20,stop=20))
public class UnifiedGenotyper extends LocusWalker<Pair<List<Genotype>, GenotypeLocusData>, Integer> {
public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genotype>>, Integer> {
@ArgumentCollection private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
@ -154,7 +154,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<Genotype>, GenotypeL
* @param refContext the reference base
* @param fullContext contextual information around the locus
*/
public Pair<List<Genotype>, GenotypeLocusData> map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext fullContext) {
public Pair<VariationCall, List<Genotype>> map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext fullContext) {
char ref = Character.toUpperCase(refContext.getBase());
if ( !BaseUtils.isRegularBase(ref) )
return null;
@ -168,16 +168,16 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<Genotype>, GenotypeL
return null;
DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE);
Pair<List<Genotype>, GenotypeLocusData> call = gcm.calculateGenotype(tracker, ref, MQ0freeContext, priors);
Pair<VariationCall, List<Genotype>> call = gcm.calculateGenotype(tracker, ref, MQ0freeContext, priors);
// annotate the call, if possible
if ( call != null && call.second != null && call.second instanceof ArbitraryFieldsBacked ) {
if ( call != null && call.first != null && call.first instanceof ArbitraryFieldsBacked ) {
Map<String, String> annotations;
if ( UAC.ALL_ANNOTATIONS )
annotations = VariantAnnotator.getAllAnnotations(refContext, fullContext, call.second, call.first);
annotations = VariantAnnotator.getAllAnnotations(refContext, fullContext, call.first);
else
annotations = VariantAnnotator.getAnnotations(refContext, fullContext, call.second, call.first);
((ArbitraryFieldsBacked)call.second).setFields(annotations);
annotations = VariantAnnotator.getAnnotations(refContext, fullContext, call.first);
((ArbitraryFieldsBacked)call.first).setFields(annotations);
}
return call;
@ -191,7 +191,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<Genotype>, GenotypeL
public Integer reduceInit() { return 0; }
public Integer reduce(Pair<List<Genotype>, GenotypeLocusData> value, Integer sum) {
public Integer reduce(Pair<VariationCall, List<Genotype>> value, Integer sum) {
// can't call the locus because of no coverage
if ( value == null )
return sum;
@ -199,22 +199,22 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<Genotype>, GenotypeL
callsMetrics.nCalledBases++;
// can't make a confident variant call here
if ( value.first == null ||
(UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED && value.first.size() == 0) ) {
if ( value.second == null ||
(UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED && value.second.size() == 0) ) {
callsMetrics.nNonConfidentCalls++;
return sum;
}
callsMetrics.nConfidentCalls++;
// if we have a single-sample call (single sample from PointEstimate model returns no genotype locus data)
if ( value.second == null || (!writer.supportsMultiSample() && samples.size() <= 1) ) {
writer.addGenotypeCall(value.first.get(0));
// if we have a single-sample call (single sample from PointEstimate model returns no VariationCall data)
if ( value.first == null || (!writer.supportsMultiSample() && samples.size() <= 1) ) {
writer.addGenotypeCall(value.second.get(0));
}
// use multi-sample mode if we have multiple samples or the output type allows it
else {
writer.addMultiSampleCall(value.first, value.second);
writer.addMultiSampleCall(value.second, value.first);
}
return sum + 1;

View File

@ -359,10 +359,12 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
}
public boolean baseIsConfidentRef( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
Pair<List<Genotype>, GenotypeLocusData> calls = ug.map(tracker,ref,context);
if (calls == null || calls.first == null)
if ( !BaseUtils.isRegularBase(ref.getBase()) )
return false;
return (! calls.first.get(0).isVariant(ref.getBase())) && calls.first.get(0).getNegLog10PError() > confidentRefThreshold && BaseUtils.isRegularBase(ref.getBase());
Pair<VariationCall, List<Genotype>> calls = ug.map(tracker,ref,context);
if ( calls == null || calls.second == null)
return false;
return ( calls.second.size() > 0 && !calls.second.get(0).isVariant(ref.getBase()) && calls.second.get(0).getNegLog10PError() > confidentRefThreshold );
}

View File

@ -70,14 +70,14 @@ public class DeNovoSNPWalker extends RefWalker<String, Integer>{
}
AlignmentContext parent1_subContext = new AlignmentContext(context.getLocation(), parent1_reads, parent1_offsets);
Pair<List<Genotype>, GenotypeLocusData> parent1 = UG.map(tracker, ref, parent1_subContext);
Pair<VariationCall, List<Genotype>> parent1 = UG.map(tracker, ref, parent1_subContext);
AlignmentContext parent2_subContext = new AlignmentContext(context.getLocation(), parent2_reads, parent2_offsets);
Pair<List<Genotype>, GenotypeLocusData> parent2 = UG.map(tracker, ref, parent2_subContext);
Pair<VariationCall, List<Genotype>> parent2 = UG.map(tracker, ref, parent2_subContext);
if ( parent1 != null && parent1.first != null && parent2 != null && parent2.first != null ) {
Genotype parent1call = parent1.first.get(0);
Genotype parent2call = parent2.first.get(0);
if ( parent1 != null && parent1.second != null && parent2 != null && parent2.second != null ) {
Genotype parent1call = parent1.second.get(0);
Genotype parent2call = parent2.second.get(0);
if (!parent1call.isVariant(parent1call.getReference()) &&
parent1call.getNegLog10PError() > 5 &&

View File

@ -80,9 +80,9 @@ public class SnpCallRateByCoverageWalker extends LocusWalker<List<String>, Strin
List<Integer> sub_offsets = ListUtils.sliceListByIndices(subset_indices, offsets);
AlignmentContext subContext = new AlignmentContext(context.getLocation(), sub_reads, sub_offsets);
Pair<List<Genotype>, GenotypeLocusData> calls = UG.map(tracker, ref, subContext);
if (calls != null && calls.first != null) {
Genotype call = calls.first.get(0);
Pair<VariationCall, List<Genotype>> calls = UG.map(tracker, ref, subContext);
if (calls != null && calls.second != null && calls.second.size() > 0) {
Genotype call = calls.second.get(0);
String callType = (call.isVariant(call.getReference())) ? ((call.isHom()) ? "HomozygousSNP" : "HeterozygousSNP") : "HomozygousReference";
GenotypeCalls.add(coverage+"\t"+coverage_available+"\t"+hc_genotype+"\t"+callType+"\t"+toGeliString(call));
}

View File

@ -8,7 +8,8 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.GenotypeLocusData;
import org.broadinstitute.sting.utils.genotype.VariationCall;
import org.broadinstitute.sting.utils.genotype.GenotypeCall;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@ -84,10 +85,10 @@ public class FindContaminatingReadGroupsWalker extends LocusWalker<Integer, Inte
double altBalance = ((double) altCount)/((double) totalCount);
if (altBalance > 0.70) {
Pair<List<Genotype>, GenotypeLocusData> ugResult = ug.map(tracker, ref, context);
Pair<VariationCall, List<Genotype>> ugResult = ug.map(tracker, ref, context);
if (ugResult != null && ugResult.first != null) {
return ugResult.first.get(0).isHet();
if (ugResult != null && ugResult.second != null && ugResult.second.size() > 0) {
return ugResult.second.get(0).isHet();
}
}

View File

@ -0,0 +1,188 @@
package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.*;
/**
* @author ebanks
* <p/>
* Class BasicGenotypeBackedVariation
* <p/>
* represents a genotype-backed Variation.
*/
public class BasicGenotypeBackedVariation implements Variation, VariantBackedByGenotype, ConfidenceBacked {
// the discovery lod score
private double mConfidence = 0.0;
// the location
private GenomeLoc mLoc;
// the ref base and alt bases
private char mRefBase;
private List<String> mAltBases = new ArrayList<String>();
// the variant type
private final VARIANT_TYPE mType = VARIANT_TYPE.SNP;
// the genotypes
private List<Genotype> mGenotypes = null;
/**
* create a basic genotype-backed variation bject, given the following fields
*
* @param ref the reference base
* @param loc the locus
* @param type the variant type
*/
public BasicGenotypeBackedVariation(char ref, GenomeLoc loc, VARIANT_TYPE type) {
if ( type != VARIANT_TYPE.SNP && type != VARIANT_TYPE.REFERENCE )
throw new IllegalArgumentException("Only SNPs and REFs are supported in the Geli format");
mRefBase = ref;
mLoc = loc;
}
/**
* get the reference base.
* @return a character, representing the reference base
*/
public String getReference() {
return String.valueOf(mRefBase);
}
/**
* get the genotype's location
*
* @return a GenomeLoc representing the location
*/
public GenomeLoc getLocation() {
return mLoc;
}
public boolean isBiallelic() {
return true;
}
public boolean isSNP() {
return mType == VARIANT_TYPE.SNP;
}
public boolean isInsertion() {
return false;
}
public boolean isIndel() {
return false;
}
public boolean isDeletion() {
return false;
}
public boolean isReference() {
return mType == VARIANT_TYPE.REFERENCE;
}
public VARIANT_TYPE getType() {
return mType;
}
public double getNegLog10PError() {
return mConfidence / 10.0;
}
public double getNonRefAlleleFrequency() {
return 0.0;
}
public List<String> getAlternateAlleleList() {
return mAltBases;
}
public void addAlternateAllele(String alt) {
mAltBases.add(alt);
}
public List<String> getAlleleList() {
LinkedList<String> alleles = new LinkedList<String>(mAltBases);
alleles.addFirst(getReference());
return alleles;
}
public char getAlternativeBaseForSNP() {
if ( !isSNP() )
throw new IllegalStateException("This variant is not a SNP");
if ( mAltBases.size() == 0 )
throw new IllegalStateException("No alternate alleles have been set");
return mAltBases.get(0).charAt(0);
}
public char getReferenceForSNP() {
if ( !isSNP() )
throw new IllegalStateException("This variant is not a SNP");
return mRefBase;
}
/**
* get the confidence
*
* @return the confidence
*/
public double getConfidence() {
return mConfidence;
}
/**
*
* @param confidence the confidence for this genotype
*/
public void setConfidence(double confidence) {
mConfidence = confidence;
}
/**
*
* @param calls the GenotypeCalls for this variation
*/
public void setGenotypeCalls(List<Genotype> calls) {
mGenotypes = calls;
}
/**
* @return a specific genotype that represents the called genotype
*/
public Genotype getCalledGenotype() {
if ( mGenotypes == null || mGenotypes.size() != 1 )
throw new IllegalStateException("There is not one and only one Genotype associated with this Variation");
return mGenotypes.get(0);
}
/**
* @return a list of all the genotypes
*/
public List<Genotype> getGenotypes() {
return mGenotypes;
}
/**
* do we have the specified genotype? not all backedByGenotypes
* have all the genotype data.
*
* @param x the genotype
*
* @return true if available, false otherwise
*/
public boolean hasGenotype(DiploidGenotype x) {
if ( mGenotypes == null )
return false;
for ( Genotype g : mGenotypes ) {
if ( DiploidGenotype.valueOf(g.getBases()).equals(x) )
return true;
}
return false;
}
}

View File

@ -0,0 +1,19 @@
package org.broadinstitute.sting.utils.genotype;
/**
* @author ebanks
* <p/>
* Class GenotypeCall
* <p/>
* represents the genotype-specific data associated with a genotype call.
*/
public interface GenotypeCall extends Genotype {
/**
*
* @param variation the Variation object associated with this genotype
*/
public void setVariation(Variation variation);
}

View File

@ -1,31 +0,0 @@
package org.broadinstitute.sting.utils.genotype;
/**
* @author ebanks
* <p/>
* Class GenotypeLocusData
* <p/>
* represents the locus specific data associated with a genotype object.
*/
public interface GenotypeLocusData extends Variation {
/**
*
* @param alt the alternate allele base for this genotype
*/
public void addAlternateAllele(String alt);
/**
*
* @return true if the allele frequency has been set
*/
public boolean hasNonRefAlleleFrequency();
/**
*
* @param frequency the allele frequency for this genotype
*/
public void setNonRefAlleleFrequency(double frequency);
}

View File

@ -56,7 +56,7 @@ public interface GenotypeWriter {
* add a multi-sample call if we support it
* @param genotypes the list of genotypes, that are backed by sample information
*/
public void addMultiSampleCall(List<Genotype> genotypes, GenotypeLocusData metadata);
public void addMultiSampleCall(List<Genotype> genotypes, VariationCall metadata);
/**
* @return true if we support multisample, false otherwise

View File

@ -80,7 +80,7 @@ public class GenotypeWriterFactory {
* @param loc the location
* @return an unpopulated genotype call object
*/
public static Genotype createSupportedCall(GENOTYPE_FORMAT format, char ref, GenomeLoc loc) {
public static GenotypeCall createSupportedGenotypeCall(GENOTYPE_FORMAT format, char ref, GenomeLoc loc) {
switch (format) {
case VCF:
return new VCFGenotypeCall(ref, loc);
@ -102,10 +102,10 @@ public class GenotypeWriterFactory {
* @param type the variant type
* @return an unpopulated genotype locus data object
*/
public static GenotypeLocusData createSupportedGenotypeLocusData(GENOTYPE_FORMAT format, char ref, GenomeLoc loc, Variation.VARIANT_TYPE type) {
public static VariationCall createSupportedCall(GENOTYPE_FORMAT format, char ref, GenomeLoc loc, Variation.VARIANT_TYPE type) {
switch (format) {
case VCF:
return new VCFGenotypeLocusData(ref, loc, type);
return new VCFVariationCall(ref, loc, type);
case GELI:
case GELI_BINARY:
return null;

View File

@ -0,0 +1,39 @@
package org.broadinstitute.sting.utils.genotype;
import java.util.List;
/**
* @author ebanks
* <p/>
* Class VariationCall
* <p/>
* represents locus specific (non-genotype) data associated with a call plus the ability to set genotypes.
*/
public interface VariationCall extends Variation {
/**
*
* @param alt the alternate allele base for this variation
*/
public void addAlternateAllele(String alt);
/**
*
* @return true if the allele frequency has been set
*/
public boolean hasNonRefAlleleFrequency();
/**
*
* @param frequency the allele frequency for this variation
*/
public void setNonRefAlleleFrequency(double frequency);
/**
*
* @param calls the Genotypes for this variation
*/
public void setGenotypeCalls(List<Genotype> calls);
}

View File

@ -149,7 +149,7 @@ public class GeliAdapter implements GenotypeWriter {
*
* @param genotypes the list of genotypes
*/
public void addMultiSampleCall( List<Genotype> genotypes, GenotypeLocusData metadata) {
public void addMultiSampleCall( List<Genotype> genotypes, VariationCall metadata) {
throw new UnsupportedOperationException("Geli binary doesn't support multisample calls");
}

View File

@ -14,13 +14,14 @@ import java.util.Arrays;
* <p/>
* The implementation of the genotype interface, specific to Geli
*/
public class GeliGenotypeCall extends AlleleConstrainedGenotype implements ReadBacked, PosteriorsBacked {
public class GeliGenotypeCall extends AlleleConstrainedGenotype implements GenotypeCall, ReadBacked, PosteriorsBacked {
private final char mRefBase;
private final GenomeLoc mLocation;
private ReadBackedPileup mPileup = null;
private double[] mPosteriors;
private Variation mVariation = null;
// the reference genotype, the best genotype, and the next best genotype, lazy loaded
private DiploidGenotype mRefGenotype = null;
@ -77,6 +78,9 @@ public class GeliGenotypeCall extends AlleleConstrainedGenotype implements ReadB
mPosteriors = posteriors;
}
public void setVariation(Variation variation) {
this.mVariation = variation;
}
@Override
public boolean equals(Object other) {
@ -237,8 +241,15 @@ public class GeliGenotypeCall extends AlleleConstrainedGenotype implements ReadB
* @return return this genotype as a variant
*/
public Variation toVariation(char ref) {
double bestRef = Math.abs(mPosteriors[getBestGenotype().ordinal()] - mPosteriors[getRefGenotype().ordinal()]);
return new BasicVariation(this.getBases(), String.valueOf(ref), 0, this.mLocation, bestRef);
if ( mVariation == null ) {
BasicGenotypeBackedVariation var = new BasicGenotypeBackedVariation(ref, mLocation, isVariant() ? Variation.VARIANT_TYPE.SNP : Variation.VARIANT_TYPE.REFERENCE);
double confidence = Math.abs(mPosteriors[getBestGenotype().ordinal()] - mPosteriors[getRefGenotype().ordinal()]);
var.setConfidence(confidence);
if ( isVariant() )
var.addAlternateAllele(Character.toString(mBestGenotype.base1 != ref ? mBestGenotype.base1 : mBestGenotype.base2));
mVariation = var;
}
return mVariation;
}
/**

View File

@ -113,7 +113,7 @@ public class GeliTextWriter implements GenotypeWriter {
*
* @param genotypes the list of genotypes
*/
public void addMultiSampleCall(List<Genotype> genotypes, GenotypeLocusData metadata) {
public void addMultiSampleCall(List<Genotype> genotypes, VariationCall metadata) {
throw new UnsupportedOperationException("Geli text doesn't support multisample calls");
}

View File

@ -14,7 +14,7 @@ import java.util.Arrays;
* <p/>
* The implementation of the genotype interface, specific to GLF
*/
public class GLFGenotypeCall implements Genotype, ReadBacked, LikelihoodsBacked {
public class GLFGenotypeCall implements GenotypeCall, ReadBacked, LikelihoodsBacked {
private final char mRefBase;
private final GenomeLoc mLocation;
@ -24,6 +24,9 @@ public class GLFGenotypeCall implements Genotype, ReadBacked, LikelihoodsBacked
private double mNegLog10PError;
private String mGenotype;
private Variation mVariation = null;
/**
* Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object
*
@ -54,6 +57,10 @@ public class GLFGenotypeCall implements Genotype, ReadBacked, LikelihoodsBacked
mNegLog10PError = negLog10PError;
}
public void setVariation(Variation variation) {
this.mVariation = variation;
}
public void setGenotype(String genotype) {
mGenotype = genotype;
}
@ -120,7 +127,10 @@ public class GLFGenotypeCall implements Genotype, ReadBacked, LikelihoodsBacked
* @return return this genotype as a variant
*/
public Variation toVariation(char ref) {
throw new UnsupportedOperationException("GLF call doesn't support conversion to Variation");
if ( mVariation == null ) {
mVariation = new BasicGenotypeBackedVariation(ref, mLocation, Variation.VARIANT_TYPE.REFERENCE);
}
return mVariation;
}
/**

View File

@ -289,7 +289,7 @@ public class GLFWriter implements GenotypeWriter {
*
* @param genotypes the list of genotypes
*/
public void addMultiSampleCall(List<Genotype> genotypes, GenotypeLocusData metadata) {
public void addMultiSampleCall(List<Genotype> genotypes, VariationCall metadata) {
throw new UnsupportedOperationException("GLF writer doesn't support multisample calls");
}

View File

@ -104,7 +104,7 @@ public class TabularLFWriter implements GenotypeWriter {
* @param genotypes the list of genotypes, that are backed by sample information
*/
@Override
public void addMultiSampleCall(List<Genotype> genotypes, GenotypeLocusData metadata) {
public void addMultiSampleCall(List<Genotype> genotypes, VariationCall metadata) {
throw new UnsupportedOperationException("Tabular LF doesn't support multisample calls");
}

View File

@ -14,7 +14,7 @@ import java.util.Arrays;
* <p/>
* The implementation of the genotype interface, specific to VCF
*/
public class VCFGenotypeCall extends AlleleConstrainedGenotype implements ReadBacked, SampleBacked {
public class VCFGenotypeCall extends AlleleConstrainedGenotype implements GenotypeCall, ReadBacked, SampleBacked {
private final char mRefBase;
private final GenomeLoc mLocation;
@ -22,6 +22,7 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements ReadBa
private int mCoverage = 0;
private double[] mPosteriors;
private Variation mVariation = null;
// the reference genotype, the best genotype, and the next best genotype, lazy loaded
private DiploidGenotype mRefGenotype = null;
@ -77,6 +78,10 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements ReadBa
mPosteriors = posteriors;
}
public void setVariation(Variation variation) {
this.mVariation = variation;
}
public void setSampleName(String name) {
mSampleName = name;
}
@ -93,7 +98,7 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements ReadBa
if (!this.mBestGenotype.equals(otherCall.mBestGenotype))
return false;
return (this.mRefBase == otherCall.mRefBase);
return (this.mRefBase == otherCall.mRefBase && this.mLocation.equals(otherCall.mLocation));
}
return false;
}
@ -214,7 +219,7 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements ReadBa
* @return true if we're a variant
*/
public boolean isVariant(char ref) {
return !Utils.dupString(ref, 2).equals(getBestGenotype().toString());
return !getBestGenotype().isHomRef(ref);
}
/**
@ -231,8 +236,15 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements ReadBa
* @return return this genotype as a variant
*/
public Variation toVariation(char ref) {
double bestRef = Math.abs(mPosteriors[getBestGenotype().ordinal()] - mPosteriors[getRefGenotype().ordinal()]);
return new BasicVariation(this.getBases(), String.valueOf(ref), 0, this.mLocation, bestRef);
if ( mVariation == null ) {
VCFVariationCall var = new VCFVariationCall(ref, mLocation, isVariant() ? Variation.VARIANT_TYPE.SNP : Variation.VARIANT_TYPE.REFERENCE);
double confidence = Math.abs(mPosteriors[getBestGenotype().ordinal()] - mPosteriors[getRefGenotype().ordinal()]);
var.setConfidence(confidence);
if ( isVariant() )
var.addAlternateAllele(Character.toString(mBestGenotype.base1 != ref ? mBestGenotype.base1 : mBestGenotype.base2));
mVariation = var;
}
return mVariation;
}
/**

View File

@ -109,9 +109,9 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
*
* @param genotypes the list of genotypes
*/
public void addMultiSampleCall(List<Genotype> genotypes, GenotypeLocusData locusdata) {
if ( locusdata != null && !(locusdata instanceof VCFGenotypeLocusData) )
throw new IllegalArgumentException("Only VCFGenotypeLocusData objects should be passed in to the VCF writers");
public void addMultiSampleCall(List<Genotype> genotypes, VariationCall locusdata) {
if ( locusdata != null && !(locusdata instanceof VCFVariationCall) )
throw new IllegalArgumentException("Only VCFVariationCall objects should be passed in to the VCF writers");
VCFParameters params = new VCFParameters();
params.addFormatItem("GT");
@ -151,17 +151,17 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
}
// info fields
Map<String, String> infoFields = getInfoFields((VCFGenotypeLocusData)locusdata, params);
Map<String, String> infoFields = getInfoFields((VCFVariationCall)locusdata, params);
// q-score
double qual = (locusdata == null) ? 0 : ((VCFGenotypeLocusData)locusdata).getConfidence();
double qual = (locusdata == null) ? 0 : ((VCFVariationCall)locusdata).getConfidence();
// min Q-score is zero
qual = Math.max(qual, 0);
// dbsnp id
String dbSnpID = null;
if ( locusdata != null )
dbSnpID = ((VCFGenotypeLocusData)locusdata).getID();
dbSnpID = ((VCFVariationCall)locusdata).getID();
VCFRecord vcfRecord = new VCFRecord(params.getReferenceBase(),
params.getContig(),
@ -185,7 +185,7 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
*
* @return a mapping of info field to value
*/
private static Map<String, String> getInfoFields(VCFGenotypeLocusData locusdata, VCFParameters params) {
private static Map<String, String> getInfoFields(VCFVariationCall locusdata, VCFParameters params) {
Map<String, String> infoFields = new HashMap<String, String>();
if ( locusdata != null ) {
if ( locusdata.getSLOD() != null )

View File

@ -148,7 +148,7 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
for (String str : headerStrings) {
Matcher matcher = pMeta.matcher(str);
if (matcher.matches()) {
String metaKey = "";
String metaKey;
String metaValue = "";
if (matcher.groupCount() < 1) continue;
if (matcher.groupCount() == 2) metaValue = matcher.group(2);
@ -300,7 +300,6 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
return this.mHeader;
}
@Override
public Iterator<VCFRecord> iterator() {
return this;
}

View File

@ -8,11 +8,11 @@ import java.util.*;
/**
* @author ebanks
* <p/>
* Class VCFGenotypeLocusData
* Class VCFVariationCall
* <p/>
* represents the meta data for a genotype object.
* represents a VCF Variation
*/
public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked, SLODBacked, IDBacked, ArbitraryFieldsBacked {
public class VCFVariationCall implements VariationCall, VariantBackedByGenotype, ConfidenceBacked, SLODBacked, IDBacked, ArbitraryFieldsBacked {
// the discovery lod score
private double mConfidence = 0.0;
@ -36,17 +36,20 @@ public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked
// the id
private String mID;
// the genotypes
private List<Genotype> mGenotypes = null;
// the various info field values
private Map<String, String> mInfoFields;
/**
* create a basic genotype meta data pbject, given the following fields
* create a VCF Variation object, given the following fields
*
* @param ref the reference base
* @param loc the locus
* @param type the variant type
*/
public VCFGenotypeLocusData(char ref, GenomeLoc loc, VARIANT_TYPE type) {
public VCFVariationCall(char ref, GenomeLoc loc, VARIANT_TYPE type) {
mRefBase = ref;
mLoc = loc;
mType = type;
@ -190,6 +193,50 @@ public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked
mID = id;
}
/**
*
* @param calls the GenotypeCalls for this variation
*/
public void setGenotypeCalls(List<Genotype> calls) {
mGenotypes = calls;
}
/**
* @return a specific genotype that represents the called genotype
*/
public Genotype getCalledGenotype() {
if ( mGenotypes == null || mGenotypes.size() != 1 )
throw new IllegalStateException("There is not one and only one Genotype associated with this Variation");
return mGenotypes.get(0);
}
/**
* @return a list of all the genotypes
*/
public List<Genotype> getGenotypes() {
return mGenotypes;
}
/**
* do we have the specified genotype? not all backedByGenotypes
* have all the genotype data.
*
* @param x the genotype
*
* @return true if available, false otherwise
*/
public boolean hasGenotype(DiploidGenotype x) {
if ( mGenotypes == null )
return false;
for ( Genotype g : mGenotypes ) {
if ( DiploidGenotype.valueOf(g.getBases()).equals(x) )
return true;
}
return false;
}
/**
* @return returns te arbitrary info fields
*/

View File

@ -47,7 +47,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1PointEM() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1,
Arrays.asList("e401eb288c167b72f2fb3d0b3f9c22da"));
Arrays.asList("ad7024c3c880a451d2f5537797b49beb"));
executeTest("testMultiSamplePilot1 - Point Estimate EM", spec);
}
@ -55,7 +55,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot2PointEM() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1,
Arrays.asList("ce9f37df3275ed4e7abaedf33d982889"));
Arrays.asList("7709af47dde8e127e0e36e86073e2cb1"));
executeTest("testMultiSamplePilot2 - Point Estimate EM", spec);
}