diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java index 7f3a20bf8..4001e5f89 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java @@ -11,9 +11,12 @@ import java.util.ArrayList; public class AlleleBalance extends StandardVariantAnnotation { - public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List genotypes) { + public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) { - if ( genotypes.size() == 0 ) + if ( !(variation instanceof VariantBackedByGenotype) ) + return null; + final List genotypes = ((VariantBackedByGenotype)variation).getGenotypes(); + if ( genotypes == null || genotypes.size() == 0 ) return null; double ratio; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java index ae54a6147..12b02db72 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java @@ -2,15 +2,12 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.Variation; -import java.util.List; - public class DepthOfCoverage extends StandardVariantAnnotation { - public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List genotypes) { + public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) { int depth = pileup.getReads().size(); return String.format("%d", depth); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java index b655a5196..67ed4088f 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java @@ -5,6 +5,7 @@ import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.Variation; +import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype; import net.sf.samtools.SAMRecord; import cern.jet.math.Arithmetic; @@ -13,7 +14,13 @@ import java.util.List; public class FisherStrand implements VariantAnnotation { - public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List genotypes) { + public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) { + + if ( !(variation instanceof VariantBackedByGenotype) ) + return null; + final List genotypes = ((VariantBackedByGenotype)variation).getGenotypes(); + if ( genotypes == null || genotypes.size() == 0 ) + return null; // this test doesn't make sense for homs Genotype genotype = VariantAnnotator.getFirstHetVariant(genotypes); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java index d1e60d947..f8a27ce62 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java @@ -3,15 +3,12 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.Variation; -import java.util.List; - public class HomopolymerRun extends StandardVariantAnnotation { - public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List genotypes) { + public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) { if ( !variation.isBiallelic() || !variation.isSNP() ) return null; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java index 00ad93526..9e3dbf727 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java @@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.Variation; import net.sf.samtools.SAMRecord; @@ -11,7 +10,7 @@ import java.util.List; public class MappingQualityZero extends StandardVariantAnnotation { - public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List genotypes) { + public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) { List reads = pileup.getReads(); int MQ0Count = 0; for (int i=0; i < reads.size(); i++) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java index 0e2b365e8..7c684d3ae 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java @@ -3,7 +3,6 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.Variation; import net.sf.samtools.SAMRecord; @@ -12,7 +11,7 @@ import java.util.List; public class RMSMappingQuality extends StandardVariantAnnotation { - public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List genotypes) { + public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) { List reads = pileup.getReads(); int[] qualities = new int[reads.size()]; for (int i=0; i < reads.size(); i++) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java index 2e98f8ce5..d245e82e2 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java @@ -14,9 +14,12 @@ public class RankSumTest implements VariantAnnotation { private final static boolean DEBUG = false; private static final double minPValue = 1e-10; - public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List genotypes) { + public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) { - if ( genotypes.size() == 0 ) + if ( !(variation instanceof VariantBackedByGenotype) ) + return null; + final List genotypes = ((VariantBackedByGenotype)variation).getGenotypes(); + if ( genotypes == null || genotypes.size() == 0 ) return null; // this test doesn't make sense for homs diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkew.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkew.java index 35fd2aa23..20cec5344 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkew.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkew.java @@ -4,11 +4,9 @@ import org.broadinstitute.sting.utils.Pair; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.Variation; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import java.util.List; /** * Created by IntelliJ IDEA. @@ -30,7 +28,7 @@ public class SecondBaseSkew implements VariantAnnotation { public String getDescription() { return KEY_NAME + ",1,Float,\"Chi-square Secondary Base Skew\""; } - public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List genotypes) { + public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) { if ( variation.isSNP() && variation.isBiallelic() ) { char snp = variation.getAlternativeBaseForSNP(); Pair depthProp = getSecondaryPileupNonrefEstimator(ref.getBase(), pileup, snp); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java index 20c89e188..0fb09a693 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java @@ -2,15 +2,12 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.Variation; -import java.util.List; - public class SpanningDeletions extends StandardVariantAnnotation { - public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List genotypes) { + public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) { int deletions = pileup.getNumberOfDeletions(); return String.format("%.2f", (double)deletions/(double)pileup.size()); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotation.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotation.java index 55f1324d0..4ba6c6ba8 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotation.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotation.java @@ -2,15 +2,13 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.Variation; -import java.util.List; public interface VariantAnnotation { // return the annotation for the given locus data (return null for no annotation) - public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation, List genotypes); + public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation); // return true if you want to use a context with mapping quality zero reads, false otherwise public boolean useZeroQualityReads(); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index 85d9ecfc1..b43b0bc97 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -164,12 +164,8 @@ public class VariantAnnotator extends RodWalker { // if the reference base is not ambiguous, the variant is a SNP, and it's the appropriate type, we can annotate if ( BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 && variant.isBiallelic() && - variant.isSNP() && - variant instanceof VariantBackedByGenotype ) { - final List genotypes = ((VariantBackedByGenotype)variant).getGenotypes(); - if ( genotypes != null ) - annotations = getAnnotations(ref, context, variant, genotypes, requestedAnnotations); - } + variant.isSNP() ) + annotations = getAnnotations(ref, context, variant, requestedAnnotations); writeVCF(tracker, ref, context, variant, annotations); @@ -177,30 +173,30 @@ public class VariantAnnotator extends RodWalker { } // option #1: don't specify annotations to be used: standard annotations are used by default - public static Map getAnnotations(ReferenceContext ref, AlignmentContext context, Variation variation, List genotypes) { + public static Map getAnnotations(ReferenceContext ref, AlignmentContext context, Variation variation) { if ( standardAnnotations == null ) determineAllAnnotations(); - return getAnnotations(ref, context, variation, genotypes, standardAnnotations.values()); + return getAnnotations(ref, context, variation, standardAnnotations.values()); } // option #2: specify that all possible annotations be used - public static Map getAllAnnotations(ReferenceContext ref, AlignmentContext context, Variation variation, List genotypes) { + public static Map getAllAnnotations(ReferenceContext ref, AlignmentContext context, Variation variation) { if ( allAnnotations == null ) determineAllAnnotations(); - return getAnnotations(ref, context, variation, genotypes, allAnnotations.values()); + return getAnnotations(ref, context, variation, allAnnotations.values()); } // option #3: specify the exact annotations to be used - public static Map getAnnotations(ReferenceContext ref, AlignmentContext context, Variation variation, List genotypes, Collection annotations) { + public static Map getAnnotations(ReferenceContext ref, AlignmentContext context, Variation variation, Collection annotations) { + + HashMap results = new HashMap(); // set up the pileup for the full collection of reads at this position ReadBackedPileup fullPileup = context.getPileup(); ReadBackedPileup MQ0freePileup = fullPileup.getPileupWithoutMappingQualityZeroReads(); - HashMap results = new HashMap(); - for ( VariantAnnotation annotator : annotations) { - String annot = annotator.annotate(ref, (annotator.useZeroQualityReads() ? fullPileup : MQ0freePileup), variation, genotypes); + String annot = annotator.annotate(ref, (annotator.useZeroQualityReads() ? fullPileup : MQ0freePileup), variation); if ( annot != null ) { results.put(annotator.getKeyName(), annot); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java index d04334b0c..8985380ef 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java @@ -79,7 +79,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul for ( String sample : GLs.keySet() ) { // create the call - Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, loc); + GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, loc); if ( call instanceof ReadBacked ) { ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedContext.OVERALL).getPileup(); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java index 7be9d92d6..1f2e71bf9 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java @@ -19,7 +19,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode protected EMGenotypeCalculationModel() {} - public Pair, GenotypeLocusData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) { + public Pair> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) { // keep track of the context for each sample, overall and separated by strand HashMap contexts = splitContextBySample(context); @@ -45,11 +45,13 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode // are we above the lod threshold for emitting calls (and not in all-bases mode)? if ( !ALL_BASE_MODE && (bestIsRef || phredScaledConfidence < CONFIDENCE_THRESHOLD) ) { - return new Pair, GenotypeLocusData>(null, null); + return new Pair>(null, null); } // generate the calls - GenotypeLocusData locusdata = GenotypeWriterFactory.createSupportedGenotypeLocusData(OUTPUT_FORMAT, ref, context.getLocation(), Variation.VARIANT_TYPE.SNP); + List calls = genotypeCallsFromGenotypeLikelihoods(overall, ref, contexts); + + VariationCall locusdata = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, context.getLocation(), bestIsRef ? Variation.VARIANT_TYPE.REFERENCE : Variation.VARIANT_TYPE.SNP); if ( locusdata != null ) { if ( locusdata instanceof ConfidenceBacked ) { ((ConfidenceBacked)locusdata).setConfidence(phredScaledConfidence); @@ -79,8 +81,13 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode ((SLODBacked)locusdata).setSLOD(strandScore); } locusdata.setNonRefAlleleFrequency(overall.getMAF()); + + // finally, associate the Variation with the Genotypes + locusdata.setGenotypeCalls(calls); + for ( Genotype call : calls ) + ((GenotypeCall)call).setVariation(locusdata); } - return new Pair, GenotypeLocusData>(genotypeCallsFromGenotypeLikelihoods(overall, ref, contexts), locusdata); + return new Pair>(locusdata, calls); } protected List genotypeCallsFromGenotypeLikelihoods(EMOutput results, char ref, HashMap contexts) { @@ -93,7 +100,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode // create the call AlignmentContext context = contexts.get(sample).getContext(StratifiedContext.OVERALL); - Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, context.getLocation()); + GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, context.getLocation()); if ( call instanceof ReadBacked ) { ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedContext.OVERALL).getPileup(); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java index 9bf1f8595..8baa18c0f 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java @@ -7,6 +7,8 @@ import org.broadinstitute.sting.utils.genotype.*; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.Pair; import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.pileup.PileupElement; import org.apache.log4j.Logger; import java.io.*; @@ -102,10 +104,10 @@ public abstract class GenotypeCalculationModel implements Cloneable { * * @return calls */ - public abstract Pair, GenotypeLocusData> calculateGenotype(RefMetaDataTracker tracker, - char ref, - AlignmentContext context, - DiploidGenotypePriors priors); + public abstract Pair> calculateGenotype(RefMetaDataTracker tracker, + char ref, + AlignmentContext context, + DiploidGenotypePriors priors); /** * @param tracker rod data @@ -137,15 +139,11 @@ public abstract class GenotypeCalculationModel implements Cloneable { HashMap contexts = new HashMap(); - int deletionsInPileup = 0; - List reads = context.getReads(); - List offsets = context.getOffsets(); - - for (int i = 0; i < reads.size(); i++) { + ReadBackedPileup pileup = context.getPileup(); + for (PileupElement p : pileup ) { // get the read and offset - SAMRecord read = reads.get(i); - int offset = offsets.get(i); + SAMRecord read = p.getRead(); // find the sample String sample; @@ -165,19 +163,15 @@ public abstract class GenotypeCalculationModel implements Cloneable { contexts.put(sample, myContext); } - // check for deletions - if ( offset == -1 ) - deletionsInPileup++; - // add the read to this sample's context // note that bad bases are added to the context (for DoC calculations later) - myContext.add(read, offset); + myContext.add(read, p.getOffset()); } // are there too many deletions in the pileup? if ( maxDeletionFractionInPileup != -1.0 && - (double)deletionsInPileup / (double)reads.size() > maxDeletionFractionInPileup ) + (double)pileup.getNumberOfDeletions() / (double)pileup.size() > maxDeletionFractionInPileup ) return null; return contexts; @@ -221,19 +215,19 @@ public abstract class GenotypeCalculationModel implements Cloneable { private AlignmentContext getOverallContext() { if ( overall == null ) - overall = new AlignmentContext(loc, allReads, allOffsets); + overall = new AlignmentContext(loc, new ReadBackedPileup(loc, allReads, allOffsets)); return overall; } private AlignmentContext getForwardContext() { if ( forward == null ) - forward = new AlignmentContext(loc, forwardReads, forwardOffsets); + forward = new AlignmentContext(loc, new ReadBackedPileup(loc, forwardReads, forwardOffsets)); return forward; } private AlignmentContext getReverseContext() { if ( reverse == null ) - reverse = new AlignmentContext(loc, reverseReads, reverseOffsets); + reverse = new AlignmentContext(loc, new ReadBackedPileup(loc, reverseReads, reverseOffsets)); return reverse; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java index 4d1f04668..acf56d6ca 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java @@ -35,7 +35,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc protected JointEstimateGenotypeCalculationModel() {} - public Pair, GenotypeLocusData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) { + public Pair> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) { // keep track of the context for each sample, overall and separated by strand HashMap contexts = createContexts(context); @@ -49,7 +49,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc initializeBestAlternateAllele(ref, context); // if there are no non-ref bases and we don't want all bases, then we can just return if ( !ALL_BASE_MODE && bestAlternateAllele == null ) - return new Pair, GenotypeLocusData>(null, null); + return new Pair>(null, null); initializeAlleleFrequencies(frequencyEstimationPoints); @@ -302,7 +302,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc return new ArrayList(); } - protected Pair, GenotypeLocusData> createCalls(RefMetaDataTracker tracker, char ref, HashMap contexts, GenomeLoc loc, int frequencyEstimationPoints) { + protected Pair> createCalls(RefMetaDataTracker tracker, char ref, HashMap contexts, GenomeLoc loc, int frequencyEstimationPoints) { // only need to look at the most likely alternate allele int indexOfMax = BaseUtils.simpleBaseToBaseIndex(bestAlternateAllele); @@ -313,14 +313,14 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc // return a null call if we don't pass the confidence cutoff or the most likely allele frequency is zero if ( !ALL_BASE_MODE && (bestAFguess == 0 || phredScaledConfidence < CONFIDENCE_THRESHOLD) ) - return new Pair, GenotypeLocusData>(null, null); + return new Pair>(null, null); // populate the sample-specific data List calls = makeGenotypeCalls(ref, bestAlternateAllele, contexts, loc); // next, the general locus data - // note that calculating strand bias involves overwriting data structures, so we do that last - GenotypeLocusData locusdata = GenotypeWriterFactory.createSupportedGenotypeLocusData(OUTPUT_FORMAT, ref, loc, VARIANT_TYPE.SNP); + // *** note that calculating strand bias involves overwriting data structures, so we do that last + VariationCall locusdata = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, loc, bestAFguess == 0 ? VARIANT_TYPE.REFERENCE : VARIANT_TYPE.SNP); if ( locusdata != null ) { locusdata.addAlternateAllele(bestAlternateAllele.toString()); locusdata.setNonRefAlleleFrequency((double)bestAFguess / (double)(frequencyEstimationPoints-1)); @@ -373,6 +373,13 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc } } - return new Pair, GenotypeLocusData>(calls, locusdata); + // finally, associate the Variation with the Genotypes (if available) + if ( locusdata != null ) { + locusdata.setGenotypeCalls(calls); + for ( Genotype call : calls ) + ((GenotypeCall)call).setVariation(locusdata); + } + + return new Pair>(locusdata, calls); } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java index 185af1f98..3880266ec 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java @@ -26,7 +26,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation // overload this method so we can special-case the single sample - public Pair, GenotypeLocusData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) { + public Pair> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) { // we don't actually want to run EM for single samples if ( samples.size() == 1 ) { @@ -45,7 +45,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation return null; // get the genotype likelihoods - Pair discoveryGL = getSingleSampleLikelihoods(ref, sampleContext, priors, StratifiedContext.OVERALL); + Pair discoveryGL = getSingleSampleLikelihoods(sampleContext, priors, StratifiedContext.OVERALL); // find the index of the best genotype double[] posteriors = discoveryGL.second.getNormalizedPosteriors(); @@ -68,11 +68,12 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation // are we above the lod threshold for emitting calls (and not in all-bases mode)? if ( !ALL_BASE_MODE && (bestIsRef || phredScaledConfidence < CONFIDENCE_THRESHOLD) ) { - return new Pair, GenotypeLocusData>(null, null); + return new Pair>(null, null); } // we can now create the genotype call object - Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, context.getLocation()); + GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, context.getLocation()); + call.setVariation(null); if ( call instanceof ReadBacked ) { ((ReadBacked)call).setPileup(discoveryGL.first); @@ -87,7 +88,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation ((PosteriorsBacked)call).setPosteriors(discoveryGL.second.getPosteriors()); } - GenotypeLocusData locusdata = GenotypeWriterFactory.createSupportedGenotypeLocusData(OUTPUT_FORMAT, ref, context.getLocation(), Variation.VARIANT_TYPE.SNP); + VariationCall locusdata = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, context.getLocation(), bestIsRef ? Variation.VARIANT_TYPE.REFERENCE : Variation.VARIANT_TYPE.SNP); if ( locusdata != null ) { if ( locusdata instanceof ConfidenceBacked ) { ((ConfidenceBacked)locusdata).setConfidence(phredScaledConfidence); @@ -99,13 +100,13 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation } } - return new Pair, GenotypeLocusData>(Arrays.asList(call), locusdata); + return new Pair>(locusdata, Arrays.asList((Genotype)call)); } return super.calculateGenotype(tracker, ref, context, priors); } - private Pair getSingleSampleLikelihoods(char ref, AlignmentContextBySample sampleContext, DiploidGenotypePriors priors, StratifiedContext contextType) { + private Pair getSingleSampleLikelihoods(AlignmentContextBySample sampleContext, DiploidGenotypePriors priors, StratifiedContext contextType) { // create the pileup AlignmentContext myContext = sampleContext.getContext(contextType); ReadBackedPileup pileup = myContext.getPileup(); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index a90c75928..9e4b670c2 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -44,7 +44,7 @@ import java.util.*; @Reference(window=@Window(start=-20,stop=20)) -public class UnifiedGenotyper extends LocusWalker, GenotypeLocusData>, Integer> { +public class UnifiedGenotyper extends LocusWalker>, Integer> { @ArgumentCollection private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection(); @@ -154,7 +154,7 @@ public class UnifiedGenotyper extends LocusWalker, GenotypeL * @param refContext the reference base * @param fullContext contextual information around the locus */ - public Pair, GenotypeLocusData> map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext fullContext) { + public Pair> map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext fullContext) { char ref = Character.toUpperCase(refContext.getBase()); if ( !BaseUtils.isRegularBase(ref) ) return null; @@ -168,16 +168,16 @@ public class UnifiedGenotyper extends LocusWalker, GenotypeL return null; DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE); - Pair, GenotypeLocusData> call = gcm.calculateGenotype(tracker, ref, MQ0freeContext, priors); + Pair> call = gcm.calculateGenotype(tracker, ref, MQ0freeContext, priors); // annotate the call, if possible - if ( call != null && call.second != null && call.second instanceof ArbitraryFieldsBacked ) { + if ( call != null && call.first != null && call.first instanceof ArbitraryFieldsBacked ) { Map annotations; if ( UAC.ALL_ANNOTATIONS ) - annotations = VariantAnnotator.getAllAnnotations(refContext, fullContext, call.second, call.first); + annotations = VariantAnnotator.getAllAnnotations(refContext, fullContext, call.first); else - annotations = VariantAnnotator.getAnnotations(refContext, fullContext, call.second, call.first); - ((ArbitraryFieldsBacked)call.second).setFields(annotations); + annotations = VariantAnnotator.getAnnotations(refContext, fullContext, call.first); + ((ArbitraryFieldsBacked)call.first).setFields(annotations); } return call; @@ -191,7 +191,7 @@ public class UnifiedGenotyper extends LocusWalker, GenotypeL public Integer reduceInit() { return 0; } - public Integer reduce(Pair, GenotypeLocusData> value, Integer sum) { + public Integer reduce(Pair> value, Integer sum) { // can't call the locus because of no coverage if ( value == null ) return sum; @@ -199,22 +199,22 @@ public class UnifiedGenotyper extends LocusWalker, GenotypeL callsMetrics.nCalledBases++; // can't make a confident variant call here - if ( value.first == null || - (UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED && value.first.size() == 0) ) { + if ( value.second == null || + (UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED && value.second.size() == 0) ) { callsMetrics.nNonConfidentCalls++; return sum; } callsMetrics.nConfidentCalls++; - // if we have a single-sample call (single sample from PointEstimate model returns no genotype locus data) - if ( value.second == null || (!writer.supportsMultiSample() && samples.size() <= 1) ) { - writer.addGenotypeCall(value.first.get(0)); + // if we have a single-sample call (single sample from PointEstimate model returns no VariationCall data) + if ( value.first == null || (!writer.supportsMultiSample() && samples.size() <= 1) ) { + writer.addGenotypeCall(value.second.get(0)); } // use multi-sample mode if we have multiple samples or the output type allows it else { - writer.addMultiSampleCall(value.first, value.second); + writer.addMultiSampleCall(value.second, value.first); } return sum + 1; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/BaseTransitionTableCalculatorJavaWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/BaseTransitionTableCalculatorJavaWalker.java index 7df8fe7af..73fef1581 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/BaseTransitionTableCalculatorJavaWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/BaseTransitionTableCalculatorJavaWalker.java @@ -359,10 +359,12 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker, GenotypeLocusData> calls = ug.map(tracker,ref,context); - if (calls == null || calls.first == null) + if ( !BaseUtils.isRegularBase(ref.getBase()) ) return false; - return (! calls.first.get(0).isVariant(ref.getBase())) && calls.first.get(0).getNegLog10PError() > confidentRefThreshold && BaseUtils.isRegularBase(ref.getBase()); + Pair> calls = ug.map(tracker,ref,context); + if ( calls == null || calls.second == null) + return false; + return ( calls.second.size() > 0 && !calls.second.get(0).isVariant(ref.getBase()) && calls.second.get(0).getNegLog10PError() > confidentRefThreshold ); } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/DeNovoSNPWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/DeNovoSNPWalker.java index ff97a1c2b..9fbe2c3e3 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/DeNovoSNPWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/DeNovoSNPWalker.java @@ -70,14 +70,14 @@ public class DeNovoSNPWalker extends RefWalker{ } AlignmentContext parent1_subContext = new AlignmentContext(context.getLocation(), parent1_reads, parent1_offsets); - Pair, GenotypeLocusData> parent1 = UG.map(tracker, ref, parent1_subContext); + Pair> parent1 = UG.map(tracker, ref, parent1_subContext); AlignmentContext parent2_subContext = new AlignmentContext(context.getLocation(), parent2_reads, parent2_offsets); - Pair, GenotypeLocusData> parent2 = UG.map(tracker, ref, parent2_subContext); + Pair> parent2 = UG.map(tracker, ref, parent2_subContext); - if ( parent1 != null && parent1.first != null && parent2 != null && parent2.first != null ) { - Genotype parent1call = parent1.first.get(0); - Genotype parent2call = parent2.first.get(0); + if ( parent1 != null && parent1.second != null && parent2 != null && parent2.second != null ) { + Genotype parent1call = parent1.second.get(0); + Genotype parent2call = parent2.second.get(0); if (!parent1call.isVariant(parent1call.getReference()) && parent1call.getNegLog10PError() > 5 && diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/SnpCallRateByCoverageWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/SnpCallRateByCoverageWalker.java index 1a0258d4d..52989b833 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/SnpCallRateByCoverageWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/SnpCallRateByCoverageWalker.java @@ -80,9 +80,9 @@ public class SnpCallRateByCoverageWalker extends LocusWalker, Strin List sub_offsets = ListUtils.sliceListByIndices(subset_indices, offsets); AlignmentContext subContext = new AlignmentContext(context.getLocation(), sub_reads, sub_offsets); - Pair, GenotypeLocusData> calls = UG.map(tracker, ref, subContext); - if (calls != null && calls.first != null) { - Genotype call = calls.first.get(0); + Pair> calls = UG.map(tracker, ref, subContext); + if (calls != null && calls.second != null && calls.second.size() > 0) { + Genotype call = calls.second.get(0); String callType = (call.isVariant(call.getReference())) ? ((call.isHom()) ? "HomozygousSNP" : "HeterozygousSNP") : "HomozygousReference"; GenotypeCalls.add(coverage+"\t"+coverage_available+"\t"+hc_genotype+"\t"+callType+"\t"+toGeliString(call)); } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/contamination/FindContaminatingReadGroupsWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/contamination/FindContaminatingReadGroupsWalker.java index 08f0b09a1..bd91a8a22 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/contamination/FindContaminatingReadGroupsWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/contamination/FindContaminatingReadGroupsWalker.java @@ -8,7 +8,8 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.utils.genotype.Genotype; -import org.broadinstitute.sting.utils.genotype.GenotypeLocusData; +import org.broadinstitute.sting.utils.genotype.VariationCall; +import org.broadinstitute.sting.utils.genotype.GenotypeCall; import org.broadinstitute.sting.utils.Pair; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -84,10 +85,10 @@ public class FindContaminatingReadGroupsWalker extends LocusWalker 0.70) { - Pair, GenotypeLocusData> ugResult = ug.map(tracker, ref, context); + Pair> ugResult = ug.map(tracker, ref, context); - if (ugResult != null && ugResult.first != null) { - return ugResult.first.get(0).isHet(); + if (ugResult != null && ugResult.second != null && ugResult.second.size() > 0) { + return ugResult.second.get(0).isHet(); } } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/BasicGenotypeBackedVariation.java b/java/src/org/broadinstitute/sting/utils/genotype/BasicGenotypeBackedVariation.java new file mode 100755 index 000000000..74ffccb24 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/BasicGenotypeBackedVariation.java @@ -0,0 +1,188 @@ +package org.broadinstitute.sting.utils.genotype; + +import org.broadinstitute.sting.utils.GenomeLoc; + +import java.util.*; + +/** + * @author ebanks + *

+ * Class BasicGenotypeBackedVariation + *

+ * represents a genotype-backed Variation. + */ +public class BasicGenotypeBackedVariation implements Variation, VariantBackedByGenotype, ConfidenceBacked { + + // the discovery lod score + private double mConfidence = 0.0; + + // the location + private GenomeLoc mLoc; + + // the ref base and alt bases + private char mRefBase; + private List mAltBases = new ArrayList(); + + // the variant type + private final VARIANT_TYPE mType = VARIANT_TYPE.SNP; + + // the genotypes + private List mGenotypes = null; + + + /** + * create a basic genotype-backed variation bject, given the following fields + * + * @param ref the reference base + * @param loc the locus + * @param type the variant type + */ + public BasicGenotypeBackedVariation(char ref, GenomeLoc loc, VARIANT_TYPE type) { + if ( type != VARIANT_TYPE.SNP && type != VARIANT_TYPE.REFERENCE ) + throw new IllegalArgumentException("Only SNPs and REFs are supported in the Geli format"); + mRefBase = ref; + mLoc = loc; + } + + /** + * get the reference base. + * @return a character, representing the reference base + */ + public String getReference() { + return String.valueOf(mRefBase); + } + + /** + * get the genotype's location + * + * @return a GenomeLoc representing the location + */ + public GenomeLoc getLocation() { + return mLoc; + } + + public boolean isBiallelic() { + return true; + } + + public boolean isSNP() { + return mType == VARIANT_TYPE.SNP; + } + + public boolean isInsertion() { + return false; + } + + public boolean isIndel() { + return false; + } + + public boolean isDeletion() { + return false; + } + + public boolean isReference() { + return mType == VARIANT_TYPE.REFERENCE; + } + + public VARIANT_TYPE getType() { + return mType; + } + + public double getNegLog10PError() { + return mConfidence / 10.0; + } + + public double getNonRefAlleleFrequency() { + return 0.0; + } + + public List getAlternateAlleleList() { + return mAltBases; + } + + public void addAlternateAllele(String alt) { + mAltBases.add(alt); + } + + public List getAlleleList() { + LinkedList alleles = new LinkedList(mAltBases); + alleles.addFirst(getReference()); + return alleles; + } + + public char getAlternativeBaseForSNP() { + if ( !isSNP() ) + throw new IllegalStateException("This variant is not a SNP"); + if ( mAltBases.size() == 0 ) + throw new IllegalStateException("No alternate alleles have been set"); + return mAltBases.get(0).charAt(0); + } + + public char getReferenceForSNP() { + if ( !isSNP() ) + throw new IllegalStateException("This variant is not a SNP"); + return mRefBase; + } + + /** + * get the confidence + * + * @return the confidence + */ + public double getConfidence() { + return mConfidence; + } + + /** + * + * @param confidence the confidence for this genotype + */ + public void setConfidence(double confidence) { + mConfidence = confidence; + } + + /** + * + * @param calls the GenotypeCalls for this variation + */ + public void setGenotypeCalls(List calls) { + mGenotypes = calls; + } + + /** + * @return a specific genotype that represents the called genotype + */ + public Genotype getCalledGenotype() { + if ( mGenotypes == null || mGenotypes.size() != 1 ) + throw new IllegalStateException("There is not one and only one Genotype associated with this Variation"); + return mGenotypes.get(0); + } + + /** + * @return a list of all the genotypes + */ + public List getGenotypes() { + return mGenotypes; + } + + /** + * do we have the specified genotype? not all backedByGenotypes + * have all the genotype data. + * + * @param x the genotype + * + * @return true if available, false otherwise + */ + public boolean hasGenotype(DiploidGenotype x) { + if ( mGenotypes == null ) + return false; + + for ( Genotype g : mGenotypes ) { + if ( DiploidGenotype.valueOf(g.getBases()).equals(x) ) + return true; + } + + return false; + } +} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeCall.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeCall.java new file mode 100755 index 000000000..6f037bdcc --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeCall.java @@ -0,0 +1,19 @@ +package org.broadinstitute.sting.utils.genotype; + + +/** + * @author ebanks + *

+ * Class GenotypeCall + *

+ * represents the genotype-specific data associated with a genotype call. + */ +public interface GenotypeCall extends Genotype { + + /** + * + * @param variation the Variation object associated with this genotype + */ + public void setVariation(Variation variation); + +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeLocusData.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeLocusData.java deleted file mode 100755 index edd0259a7..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeLocusData.java +++ /dev/null @@ -1,31 +0,0 @@ -package org.broadinstitute.sting.utils.genotype; - - -/** - * @author ebanks - *

- * Class GenotypeLocusData - *

- * represents the locus specific data associated with a genotype object. - */ -public interface GenotypeLocusData extends Variation { - - /** - * - * @param alt the alternate allele base for this genotype - */ - public void addAlternateAllele(String alt); - - /** - * - * @return true if the allele frequency has been set - */ - public boolean hasNonRefAlleleFrequency(); - - /** - * - * @param frequency the allele frequency for this genotype - */ - public void setNonRefAlleleFrequency(double frequency); - -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java index 796654d93..4f8f5f0a7 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java @@ -56,7 +56,7 @@ public interface GenotypeWriter { * add a multi-sample call if we support it * @param genotypes the list of genotypes, that are backed by sample information */ - public void addMultiSampleCall(List genotypes, GenotypeLocusData metadata); + public void addMultiSampleCall(List genotypes, VariationCall metadata); /** * @return true if we support multisample, false otherwise diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java index 0ecaad1e1..19f585856 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java @@ -80,7 +80,7 @@ public class GenotypeWriterFactory { * @param loc the location * @return an unpopulated genotype call object */ - public static Genotype createSupportedCall(GENOTYPE_FORMAT format, char ref, GenomeLoc loc) { + public static GenotypeCall createSupportedGenotypeCall(GENOTYPE_FORMAT format, char ref, GenomeLoc loc) { switch (format) { case VCF: return new VCFGenotypeCall(ref, loc); @@ -102,10 +102,10 @@ public class GenotypeWriterFactory { * @param type the variant type * @return an unpopulated genotype locus data object */ - public static GenotypeLocusData createSupportedGenotypeLocusData(GENOTYPE_FORMAT format, char ref, GenomeLoc loc, Variation.VARIANT_TYPE type) { + public static VariationCall createSupportedCall(GENOTYPE_FORMAT format, char ref, GenomeLoc loc, Variation.VARIANT_TYPE type) { switch (format) { case VCF: - return new VCFGenotypeLocusData(ref, loc, type); + return new VCFVariationCall(ref, loc, type); case GELI: case GELI_BINARY: return null; diff --git a/java/src/org/broadinstitute/sting/utils/genotype/VariationCall.java b/java/src/org/broadinstitute/sting/utils/genotype/VariationCall.java new file mode 100755 index 000000000..18bf7a7b1 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/VariationCall.java @@ -0,0 +1,39 @@ +package org.broadinstitute.sting.utils.genotype; + + +import java.util.List; + +/** + * @author ebanks + *

+ * Class VariationCall + *

+ * represents locus specific (non-genotype) data associated with a call plus the ability to set genotypes. + */ +public interface VariationCall extends Variation { + + /** + * + * @param alt the alternate allele base for this variation + */ + public void addAlternateAllele(String alt); + + /** + * + * @return true if the allele frequency has been set + */ + public boolean hasNonRefAlleleFrequency(); + + /** + * + * @param frequency the allele frequency for this variation + */ + public void setNonRefAlleleFrequency(double frequency); + + /** + * + * @param calls the Genotypes for this variation + */ + public void setGenotypeCalls(List calls); + +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java index 02fc68f5d..2f101973f 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java @@ -149,7 +149,7 @@ public class GeliAdapter implements GenotypeWriter { * * @param genotypes the list of genotypes */ - public void addMultiSampleCall( List genotypes, GenotypeLocusData metadata) { + public void addMultiSampleCall( List genotypes, VariationCall metadata) { throw new UnsupportedOperationException("Geli binary doesn't support multisample calls"); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java index 808dba44e..e9d803926 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java @@ -14,13 +14,14 @@ import java.util.Arrays; *

* The implementation of the genotype interface, specific to Geli */ -public class GeliGenotypeCall extends AlleleConstrainedGenotype implements ReadBacked, PosteriorsBacked { +public class GeliGenotypeCall extends AlleleConstrainedGenotype implements GenotypeCall, ReadBacked, PosteriorsBacked { private final char mRefBase; private final GenomeLoc mLocation; private ReadBackedPileup mPileup = null; private double[] mPosteriors; + private Variation mVariation = null; // the reference genotype, the best genotype, and the next best genotype, lazy loaded private DiploidGenotype mRefGenotype = null; @@ -77,6 +78,9 @@ public class GeliGenotypeCall extends AlleleConstrainedGenotype implements ReadB mPosteriors = posteriors; } + public void setVariation(Variation variation) { + this.mVariation = variation; + } @Override public boolean equals(Object other) { @@ -237,8 +241,15 @@ public class GeliGenotypeCall extends AlleleConstrainedGenotype implements ReadB * @return return this genotype as a variant */ public Variation toVariation(char ref) { - double bestRef = Math.abs(mPosteriors[getBestGenotype().ordinal()] - mPosteriors[getRefGenotype().ordinal()]); - return new BasicVariation(this.getBases(), String.valueOf(ref), 0, this.mLocation, bestRef); + if ( mVariation == null ) { + BasicGenotypeBackedVariation var = new BasicGenotypeBackedVariation(ref, mLocation, isVariant() ? Variation.VARIANT_TYPE.SNP : Variation.VARIANT_TYPE.REFERENCE); + double confidence = Math.abs(mPosteriors[getBestGenotype().ordinal()] - mPosteriors[getRefGenotype().ordinal()]); + var.setConfidence(confidence); + if ( isVariant() ) + var.addAlternateAllele(Character.toString(mBestGenotype.base1 != ref ? mBestGenotype.base1 : mBestGenotype.base2)); + mVariation = var; + } + return mVariation; } /** diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java index 864471b4b..a88b9a278 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java @@ -113,7 +113,7 @@ public class GeliTextWriter implements GenotypeWriter { * * @param genotypes the list of genotypes */ - public void addMultiSampleCall(List genotypes, GenotypeLocusData metadata) { + public void addMultiSampleCall(List genotypes, VariationCall metadata) { throw new UnsupportedOperationException("Geli text doesn't support multisample calls"); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFGenotypeCall.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFGenotypeCall.java index b919a02d3..d55da57b9 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFGenotypeCall.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFGenotypeCall.java @@ -14,7 +14,7 @@ import java.util.Arrays; *

* The implementation of the genotype interface, specific to GLF */ -public class GLFGenotypeCall implements Genotype, ReadBacked, LikelihoodsBacked { +public class GLFGenotypeCall implements GenotypeCall, ReadBacked, LikelihoodsBacked { private final char mRefBase; private final GenomeLoc mLocation; @@ -24,6 +24,9 @@ public class GLFGenotypeCall implements Genotype, ReadBacked, LikelihoodsBacked private double mNegLog10PError; private String mGenotype; + private Variation mVariation = null; + + /** * Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object * @@ -54,6 +57,10 @@ public class GLFGenotypeCall implements Genotype, ReadBacked, LikelihoodsBacked mNegLog10PError = negLog10PError; } + public void setVariation(Variation variation) { + this.mVariation = variation; + } + public void setGenotype(String genotype) { mGenotype = genotype; } @@ -120,7 +127,10 @@ public class GLFGenotypeCall implements Genotype, ReadBacked, LikelihoodsBacked * @return return this genotype as a variant */ public Variation toVariation(char ref) { - throw new UnsupportedOperationException("GLF call doesn't support conversion to Variation"); + if ( mVariation == null ) { + mVariation = new BasicGenotypeBackedVariation(ref, mLocation, Variation.VARIANT_TYPE.REFERENCE); + } + return mVariation; } /** diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java index d87a79dd1..17680eeeb 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java @@ -289,7 +289,7 @@ public class GLFWriter implements GenotypeWriter { * * @param genotypes the list of genotypes */ - public void addMultiSampleCall(List genotypes, GenotypeLocusData metadata) { + public void addMultiSampleCall(List genotypes, VariationCall metadata) { throw new UnsupportedOperationException("GLF writer doesn't support multisample calls"); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/tabular/TabularLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/tabular/TabularLFWriter.java index f563d1de5..17199dd08 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/tabular/TabularLFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/tabular/TabularLFWriter.java @@ -104,7 +104,7 @@ public class TabularLFWriter implements GenotypeWriter { * @param genotypes the list of genotypes, that are backed by sample information */ @Override - public void addMultiSampleCall(List genotypes, GenotypeLocusData metadata) { + public void addMultiSampleCall(List genotypes, VariationCall metadata) { throw new UnsupportedOperationException("Tabular LF doesn't support multisample calls"); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java index eeec0bdd2..f59eeb54a 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java @@ -14,7 +14,7 @@ import java.util.Arrays; *

* The implementation of the genotype interface, specific to VCF */ -public class VCFGenotypeCall extends AlleleConstrainedGenotype implements ReadBacked, SampleBacked { +public class VCFGenotypeCall extends AlleleConstrainedGenotype implements GenotypeCall, ReadBacked, SampleBacked { private final char mRefBase; private final GenomeLoc mLocation; @@ -22,6 +22,7 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements ReadBa private int mCoverage = 0; private double[] mPosteriors; + private Variation mVariation = null; // the reference genotype, the best genotype, and the next best genotype, lazy loaded private DiploidGenotype mRefGenotype = null; @@ -77,6 +78,10 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements ReadBa mPosteriors = posteriors; } + public void setVariation(Variation variation) { + this.mVariation = variation; + } + public void setSampleName(String name) { mSampleName = name; } @@ -93,7 +98,7 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements ReadBa if (!this.mBestGenotype.equals(otherCall.mBestGenotype)) return false; - return (this.mRefBase == otherCall.mRefBase); + return (this.mRefBase == otherCall.mRefBase && this.mLocation.equals(otherCall.mLocation)); } return false; } @@ -214,7 +219,7 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements ReadBa * @return true if we're a variant */ public boolean isVariant(char ref) { - return !Utils.dupString(ref, 2).equals(getBestGenotype().toString()); + return !getBestGenotype().isHomRef(ref); } /** @@ -231,8 +236,15 @@ public class VCFGenotypeCall extends AlleleConstrainedGenotype implements ReadBa * @return return this genotype as a variant */ public Variation toVariation(char ref) { - double bestRef = Math.abs(mPosteriors[getBestGenotype().ordinal()] - mPosteriors[getRefGenotype().ordinal()]); - return new BasicVariation(this.getBases(), String.valueOf(ref), 0, this.mLocation, bestRef); + if ( mVariation == null ) { + VCFVariationCall var = new VCFVariationCall(ref, mLocation, isVariant() ? Variation.VARIANT_TYPE.SNP : Variation.VARIANT_TYPE.REFERENCE); + double confidence = Math.abs(mPosteriors[getBestGenotype().ordinal()] - mPosteriors[getRefGenotype().ordinal()]); + var.setConfidence(confidence); + if ( isVariant() ) + var.addAlternateAllele(Character.toString(mBestGenotype.base1 != ref ? mBestGenotype.base1 : mBestGenotype.base2)); + mVariation = var; + } + return mVariation; } /** diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java index 519385f78..a329f5bca 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java @@ -109,9 +109,9 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { * * @param genotypes the list of genotypes */ - public void addMultiSampleCall(List genotypes, GenotypeLocusData locusdata) { - if ( locusdata != null && !(locusdata instanceof VCFGenotypeLocusData) ) - throw new IllegalArgumentException("Only VCFGenotypeLocusData objects should be passed in to the VCF writers"); + public void addMultiSampleCall(List genotypes, VariationCall locusdata) { + if ( locusdata != null && !(locusdata instanceof VCFVariationCall) ) + throw new IllegalArgumentException("Only VCFVariationCall objects should be passed in to the VCF writers"); VCFParameters params = new VCFParameters(); params.addFormatItem("GT"); @@ -151,17 +151,17 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { } // info fields - Map infoFields = getInfoFields((VCFGenotypeLocusData)locusdata, params); + Map infoFields = getInfoFields((VCFVariationCall)locusdata, params); // q-score - double qual = (locusdata == null) ? 0 : ((VCFGenotypeLocusData)locusdata).getConfidence(); + double qual = (locusdata == null) ? 0 : ((VCFVariationCall)locusdata).getConfidence(); // min Q-score is zero qual = Math.max(qual, 0); // dbsnp id String dbSnpID = null; if ( locusdata != null ) - dbSnpID = ((VCFGenotypeLocusData)locusdata).getID(); + dbSnpID = ((VCFVariationCall)locusdata).getID(); VCFRecord vcfRecord = new VCFRecord(params.getReferenceBase(), params.getContig(), @@ -185,7 +185,7 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { * * @return a mapping of info field to value */ - private static Map getInfoFields(VCFGenotypeLocusData locusdata, VCFParameters params) { + private static Map getInfoFields(VCFVariationCall locusdata, VCFParameters params) { Map infoFields = new HashMap(); if ( locusdata != null ) { if ( locusdata.getSLOD() != null ) diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java index 8aabacdbd..f789affd2 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java @@ -148,7 +148,7 @@ public class VCFReader implements Iterator, Iterable { for (String str : headerStrings) { Matcher matcher = pMeta.matcher(str); if (matcher.matches()) { - String metaKey = ""; + String metaKey; String metaValue = ""; if (matcher.groupCount() < 1) continue; if (matcher.groupCount() == 2) metaValue = matcher.group(2); @@ -300,7 +300,6 @@ public class VCFReader implements Iterator, Iterable { return this.mHeader; } - @Override public Iterator iterator() { return this; } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeLocusData.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFVariationCall.java similarity index 73% rename from java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeLocusData.java rename to java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFVariationCall.java index 19c9eeb30..0a2ccab16 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeLocusData.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFVariationCall.java @@ -8,11 +8,11 @@ import java.util.*; /** * @author ebanks *

- * Class VCFGenotypeLocusData + * Class VCFVariationCall *

- * represents the meta data for a genotype object. + * represents a VCF Variation */ -public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked, SLODBacked, IDBacked, ArbitraryFieldsBacked { +public class VCFVariationCall implements VariationCall, VariantBackedByGenotype, ConfidenceBacked, SLODBacked, IDBacked, ArbitraryFieldsBacked { // the discovery lod score private double mConfidence = 0.0; @@ -36,17 +36,20 @@ public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked // the id private String mID; + // the genotypes + private List mGenotypes = null; + // the various info field values private Map mInfoFields; /** - * create a basic genotype meta data pbject, given the following fields + * create a VCF Variation object, given the following fields * * @param ref the reference base * @param loc the locus * @param type the variant type */ - public VCFGenotypeLocusData(char ref, GenomeLoc loc, VARIANT_TYPE type) { + public VCFVariationCall(char ref, GenomeLoc loc, VARIANT_TYPE type) { mRefBase = ref; mLoc = loc; mType = type; @@ -190,6 +193,50 @@ public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked mID = id; } + /** + * + * @param calls the GenotypeCalls for this variation + */ + public void setGenotypeCalls(List calls) { + mGenotypes = calls; + } + + /** + * @return a specific genotype that represents the called genotype + */ + public Genotype getCalledGenotype() { + if ( mGenotypes == null || mGenotypes.size() != 1 ) + throw new IllegalStateException("There is not one and only one Genotype associated with this Variation"); + return mGenotypes.get(0); + } + + /** + * @return a list of all the genotypes + */ + public List getGenotypes() { + return mGenotypes; + } + + /** + * do we have the specified genotype? not all backedByGenotypes + * have all the genotype data. + * + * @param x the genotype + * + * @return true if available, false otherwise + */ + public boolean hasGenotype(DiploidGenotype x) { + if ( mGenotypes == null ) + return false; + + for ( Genotype g : mGenotypes ) { + if ( DiploidGenotype.valueOf(g.getBases()).equals(x) ) + return true; + } + + return false; + } + /** * @return returns te arbitrary info fields */ diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index fda9aac5f..49a3bf108 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -47,7 +47,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1PointEM() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1, - Arrays.asList("e401eb288c167b72f2fb3d0b3f9c22da")); + Arrays.asList("ad7024c3c880a451d2f5537797b49beb")); executeTest("testMultiSamplePilot1 - Point Estimate EM", spec); } @@ -55,7 +55,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot2PointEM() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1, - Arrays.asList("ce9f37df3275ed4e7abaedf33d982889")); + Arrays.asList("7709af47dde8e127e0e36e86073e2cb1")); executeTest("testMultiSamplePilot2 - Point Estimate EM", spec); }