From e05cb346f3573e626eccd1232ad051114d99ae70 Mon Sep 17 00:00:00 2001 From: ebanks Date: Tue, 24 Nov 2009 21:07:55 +0000 Subject: [PATCH] GenotypeLocusData now extends Variation. Also, Variations should be INSERTIONs or DELETIONs (and not just INDELs). Technically, VCF records can be indels now. More changes coming git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2150 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/refdata/RodGLF.java | 3 +- .../sting/gatk/refdata/RodVCF.java | 3 +- .../sting/gatk/refdata/SimpleIndelROD.java | 2 +- .../sting/gatk/refdata/rodPicardDbSNP.java | 5 +- .../DiploidGenotypeCalculationModel.java | 1 - .../genotyper/EMGenotypeCalculationModel.java | 6 +- ...JointEstimateGenotypeCalculationModel.java | 11 +-- ...PointEstimateGenotypeCalculationModel.java | 2 +- .../utils/genotype/AlleleFrequencyBacked.java | 23 ----- .../utils/genotype/AlternateAlleleBacked.java | 24 ----- .../sting/utils/genotype/BasicVariation.java | 2 +- .../utils/genotype/GenotypeLocusData.java | 21 ++-- .../utils/genotype/GenotypeWriterFactory.java | 5 +- .../sting/utils/genotype/Variation.java | 10 +- .../genotype/vcf/VCFGenotypeLocusData.java | 99 +++++++++++++------ .../vcf/VCFGenotypeWriterAdapter.java | 7 +- 16 files changed, 109 insertions(+), 115 deletions(-) delete mode 100755 java/src/org/broadinstitute/sting/utils/genotype/AlleleFrequencyBacked.java delete mode 100755 java/src/org/broadinstitute/sting/utils/genotype/AlternateAlleleBacked.java diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java b/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java index f4fce2ba1..a74c6a468 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RodGLF.java @@ -263,7 +263,8 @@ public class RodGLF implements VariationRod, Iterator { @Override public VARIANT_TYPE getType() { if (this.isSNP()) return VARIANT_TYPE.SNP; - else if (this.isInsertion() || this.isDeletion()) return VARIANT_TYPE.INDEL; + else if (this.isInsertion()) return VARIANT_TYPE.INSERTION; + else if (this.isDeletion()) return VARIANT_TYPE.DELETION; else return VARIANT_TYPE.REFERENCE; } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java b/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java index ad0aa6ccd..8fcef422f 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java @@ -126,7 +126,8 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod, @Override public VARIANT_TYPE getType() { if (this.isSNP()) return VARIANT_TYPE.SNP; - else if (this.isIndel()) return VARIANT_TYPE.INDEL; + else if (this.isInsertion()) return VARIANT_TYPE.INSERTION; + else if (this.isDeletion()) return VARIANT_TYPE.DELETION; return VARIANT_TYPE.REFERENCE; } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/SimpleIndelROD.java b/java/src/org/broadinstitute/sting/gatk/refdata/SimpleIndelROD.java index 7691bec2c..6586c1650 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/SimpleIndelROD.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/SimpleIndelROD.java @@ -61,7 +61,7 @@ public class SimpleIndelROD extends TabularROD implements Genotype, VariationRod /** @return the VARIANT_TYPE of the current variant */ @Override public VARIANT_TYPE getType() { - return VARIANT_TYPE.INDEL; + return isInsertion() ? VARIANT_TYPE.INSERTION : VARIANT_TYPE.DELETION; } public boolean isSNP() { return false; } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/rodPicardDbSNP.java b/java/src/org/broadinstitute/sting/gatk/refdata/rodPicardDbSNP.java index 59e1740af..20e8258ef 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/rodPicardDbSNP.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/rodPicardDbSNP.java @@ -82,8 +82,9 @@ public class rodPicardDbSNP implements VariationRod { case SNP: return VARIANT_TYPE.SNP; case insertion: + return VARIANT_TYPE.INSERTION; case deletion: - return VARIANT_TYPE.INDEL; + return VARIANT_TYPE.DELETION; } return null; } @@ -131,7 +132,7 @@ public class rodPicardDbSNP implements VariationRod { * @return true if we're an insertion or deletion */ public boolean isIndel() { - return getType() == VARIANT_TYPE.INDEL; + return getType() == VARIANT_TYPE.INSERTION || getType() == VARIANT_TYPE.DELETION; } public String getName() { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java index efa006596..4e73fc0c4 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java @@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.genotype.*; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import java.util.*; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java index 976deaedd..d6f91efee 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java @@ -48,7 +48,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode } // generate the calls - GenotypeLocusData locusdata = GenotypeWriterFactory.createSupportedGenotypeLocusData(OUTPUT_FORMAT, ref, context.getLocation()); + GenotypeLocusData locusdata = GenotypeWriterFactory.createSupportedGenotypeLocusData(OUTPUT_FORMAT, ref, context.getLocation(), Variation.VARIANT_TYPE.SNP); if ( locusdata != null ) { if ( locusdata instanceof ConfidenceBacked ) { ((ConfidenceBacked)locusdata).setConfidence(phredScaledConfidence); @@ -77,9 +77,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode ((SLODBacked)locusdata).setSLOD(strandScore); } - if ( locusdata instanceof AlleleFrequencyBacked ) { - ((AlleleFrequencyBacked)locusdata).setAlleleFrequency(overall.getMAF()); - } + locusdata.setAlleleFrequency(overall.getMAF()); } return new Pair, GenotypeLocusData>(genotypeCallsFromGenotypeLikelihoods(overall, ref, contexts), locusdata); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java index f5748ab26..2509218e5 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.genotype.*; +import org.broadinstitute.sting.utils.genotype.Variation.VARIANT_TYPE; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.rodDbSNP; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -314,17 +315,13 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc // next, the general locus data // note that calculating strand bias involves overwriting data structures, so we do that last - GenotypeLocusData locusdata = GenotypeWriterFactory.createSupportedGenotypeLocusData(OUTPUT_FORMAT, ref, loc); + GenotypeLocusData locusdata = GenotypeWriterFactory.createSupportedGenotypeLocusData(OUTPUT_FORMAT, ref, loc, VARIANT_TYPE.SNP); if ( locusdata != null ) { + locusdata.addAlternateAllele(bestAlternateAllele.toString()); + locusdata.setAlleleFrequency((double)bestAFguess / (double)(frequencyEstimationPoints-1)); if ( locusdata instanceof ConfidenceBacked ) { ((ConfidenceBacked)locusdata).setConfidence(phredScaledConfidence); } - if ( locusdata instanceof AlternateAlleleBacked ) { - ((AlternateAlleleBacked)locusdata).setAlternateAllele(bestAlternateAllele); - } - if ( locusdata instanceof AlleleFrequencyBacked ) { - ((AlleleFrequencyBacked)locusdata).setAlleleFrequency((double)bestAFguess / (double)(frequencyEstimationPoints-1)); - } if ( locusdata instanceof IDBacked ) { rodDbSNP dbsnp = getDbSNP(tracker); if ( dbsnp != null ) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java index 5ff51a17f..a11dfdd32 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java @@ -86,7 +86,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation ((PosteriorsBacked)call).setPosteriors(discoveryGL.second.getPosteriors()); } - GenotypeLocusData locusdata = GenotypeWriterFactory.createSupportedGenotypeLocusData(OUTPUT_FORMAT, ref, context.getLocation()); + GenotypeLocusData locusdata = GenotypeWriterFactory.createSupportedGenotypeLocusData(OUTPUT_FORMAT, ref, context.getLocation(), Variation.VARIANT_TYPE.SNP); if ( locusdata != null ) { if ( locusdata instanceof ConfidenceBacked ) { ((ConfidenceBacked)locusdata).setConfidence(phredScaledConfidence); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/AlleleFrequencyBacked.java b/java/src/org/broadinstitute/sting/utils/genotype/AlleleFrequencyBacked.java deleted file mode 100755 index 043ef8896..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/AlleleFrequencyBacked.java +++ /dev/null @@ -1,23 +0,0 @@ -package org.broadinstitute.sting.utils.genotype; - -/** - * @author ebanks - * Interface AlleleFrequencyBacked - * - * this interface indicates that the genotype is - * backed up by allele frequency information. - */ -public interface AlleleFrequencyBacked { - - /** - * - * @return returns the best allele frequency for this genotype - */ - public double getAlleleFrequency(); - - /** - * - * @param frequency the allele frequency for this genotype - */ - public void setAlleleFrequency(double frequency); -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/AlternateAlleleBacked.java b/java/src/org/broadinstitute/sting/utils/genotype/AlternateAlleleBacked.java deleted file mode 100755 index d085ba884..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/AlternateAlleleBacked.java +++ /dev/null @@ -1,24 +0,0 @@ -package org.broadinstitute.sting.utils.genotype; - -/** - * @author ebanks - * Interface AlternateAlleleBacked - * - * this interface indicates that the genotype is - * backed up by alternate allele information. - */ -public interface AlternateAlleleBacked { - - /** - * - * @return returns the alternate allele for this genotype - */ - public char getAlternateAllele(); - - /** - * - * @param alt the alternate allele base for this genotype - */ - public void setAlternateAllele(char alt); - -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/BasicVariation.java b/java/src/org/broadinstitute/sting/utils/genotype/BasicVariation.java index 87535f52b..de1e0fac8 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/BasicVariation.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/BasicVariation.java @@ -65,7 +65,7 @@ public class BasicVariation implements Variation { */ @Override public VARIANT_TYPE getType() { - if (mLength != 0) return VARIANT_TYPE.INDEL; + if (mLength != 0) return VARIANT_TYPE.INSERTION; return (isSNP()) ? VARIANT_TYPE.SNP : VARIANT_TYPE.REFERENCE; } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeLocusData.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeLocusData.java index 45c68dbd6..cf069fc41 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeLocusData.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeLocusData.java @@ -1,7 +1,5 @@ package org.broadinstitute.sting.utils.genotype; -import org.broadinstitute.sting.utils.GenomeLoc; - /** * @author ebanks @@ -10,19 +8,18 @@ import org.broadinstitute.sting.utils.GenomeLoc; *

* represents the locus specific data associated with a genotype object. */ -public interface GenotypeLocusData { +public interface GenotypeLocusData extends Variation { /** - * get the reference base. - * @return a character, representing the reference base - */ - public char getReference(); - - /** - * get the genotype's location * - * @return a GenomeLoc representing the location + * @param alt the alternate allele base for this genotype */ - public GenomeLoc getLocation(); + public void addAlternateAllele(String alt); + + /** + * + * @param frequency the allele frequency for this genotype + */ + public void setAlleleFrequency(double frequency); } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java index b1ad6e400..0ecaad1e1 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java @@ -99,12 +99,13 @@ public class GenotypeWriterFactory { * @param format the format * @param ref the reference base * @param loc the location + * @param type the variant type * @return an unpopulated genotype locus data object */ - public static GenotypeLocusData createSupportedGenotypeLocusData(GENOTYPE_FORMAT format, char ref, GenomeLoc loc) { + public static GenotypeLocusData createSupportedGenotypeLocusData(GENOTYPE_FORMAT format, char ref, GenomeLoc loc, Variation.VARIANT_TYPE type) { switch (format) { case VCF: - return new VCFGenotypeLocusData(ref, loc); + return new VCFGenotypeLocusData(ref, loc, type); case GELI: case GELI_BINARY: return null; diff --git a/java/src/org/broadinstitute/sting/utils/genotype/Variation.java b/java/src/org/broadinstitute/sting/utils/genotype/Variation.java index c7c8c21dd..0f4dad91a 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/Variation.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/Variation.java @@ -14,10 +14,12 @@ import java.util.List; public interface Variation { // the types of variants we currently allow public enum VARIANT_TYPE { - SNP, INDEL, REFERENCE // though reference is not really a variant, we need to represent it + SNP, INSERTION, DELETION, REFERENCE // though reference is not really a variant, we need to represent it } - /** are we bi-allelic? */ + /** + * @return true if we are bi-allelic? + */ public boolean isBiallelic(); /** @@ -39,7 +41,7 @@ public interface Variation { public VARIANT_TYPE getType(); /** - * are we a SNP? If not we're a Indel/deletion or the reference. This method must be call before you use + * are we a SNP? If not we're a Indel/deletion or the reference. This method must be called before you use * the convenience methods getAlternativeBaseForSNP or getReferenceForSNP, to ensure that you're working with a SNP * * @return true if we're a SNP @@ -127,7 +129,7 @@ public interface Variation { /** * gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP * - * @return a char, representing the alternate base + * @return a char, representing the reference base */ public char getReferenceForSNP(); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeLocusData.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeLocusData.java index acca3aae7..939876a35 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeLocusData.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeLocusData.java @@ -3,8 +3,7 @@ package org.broadinstitute.sting.utils.genotype.vcf; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.genotype.*; -import java.util.HashMap; -import java.util.Map; +import java.util.*; /** * @author ebanks @@ -13,7 +12,7 @@ import java.util.Map; *

* represents the meta data for a genotype object. */ -public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked, SLODBacked, IDBacked, AlternateAlleleBacked, AlleleFrequencyBacked, ArbitraryFieldsBacked { +public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked, SLODBacked, IDBacked, ArbitraryFieldsBacked { // the discovery lod score private double mConfidence = 0.0; @@ -29,7 +28,10 @@ public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked // the ref base and alt bases private char mRefBase; - private char mAltBase = 'N'; + private List mAltBases = new ArrayList(); + + // the variant type + private VARIANT_TYPE mType = VARIANT_TYPE.SNP; // the id private String mID; @@ -42,18 +44,20 @@ public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked * * @param ref the reference base * @param loc the locus + * @param type the variant type */ - public VCFGenotypeLocusData(char ref, GenomeLoc loc) { + public VCFGenotypeLocusData(char ref, GenomeLoc loc, VARIANT_TYPE type) { mRefBase = ref; mLoc = loc; + mType = type; } /** * get the reference base. * @return a character, representing the reference base */ - public char getReference() { - return mRefBase; + public String getReference() { + return String.valueOf(mRefBase); } /** @@ -65,20 +69,68 @@ public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked return mLoc; } - /** - * - * @return returns the alternate allele for this genotype - */ - public char getAlternateAllele() { - return mAltBase; + public boolean isBiallelic() { + return mAltBases.size() == 1; } - /** - * - * @param alt the alternate allele base for this genotype - */ - public void setAlternateAllele(char alt) { - mAltBase = alt; + public boolean isSNP() { + return mType == VARIANT_TYPE.SNP; + } + + public boolean isInsertion() { + return mType == VARIANT_TYPE.INSERTION; + } + + public boolean isIndel() { + return mType == VARIANT_TYPE.INSERTION || mType == VARIANT_TYPE.DELETION; + } + + public boolean isDeletion() { + return mType == VARIANT_TYPE.DELETION; + } + + public boolean isReference() { + return mType == VARIANT_TYPE.REFERENCE; + } + + public VARIANT_TYPE getType() { + return mType; + } + + public double getNonRefAlleleFrequency() { + return mAlleleFrequency; + } + + public double getNegLog10PError() { + return mConfidence / 10.0; + } + + public List getAlternateAlleleList() { + return mAltBases; + } + + public void addAlternateAllele(String alt) { + mAltBases.add(alt); + } + + public List getAlleleList() { + LinkedList alleles = new LinkedList(mAltBases); + alleles.addFirst(getReference()); + return alleles; + } + + public char getAlternativeBaseForSNP() { + if ( !isSNP() ) + throw new IllegalStateException("This variant is not a SNP"); + if ( mAltBases.size() == 0 ) + throw new IllegalStateException("No alternate alleles have been set"); + return mAltBases.get(0).charAt(0); + } + + public char getReferenceForSNP() { + if ( !isSNP() ) + throw new IllegalStateException("This variant is not a SNP"); + return mRefBase; } /** @@ -115,15 +167,6 @@ public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked mSLOD = slod; } - /** - * get the allele frequency - * - * @return the allele frequency - */ - public double getAlleleFrequency() { - return mAlleleFrequency; - } - /** * * @param frequency the allele frequency for this genotype diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java index d1a67c64c..3939d534c 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java @@ -121,10 +121,11 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { if ( locusdata == null ) throw new IllegalArgumentException("Unable to parse out the current location: genotype array must contain at least one entry or have locusdata"); - params.setLocations(locusdata.getLocation(), locusdata.getReference()); + params.setLocations(locusdata.getLocation(), locusdata.getReference().charAt(0)); // if there is no genotype data, we'll also need to set an alternate allele - params.addAlternateBase(new VCFGenotypeEncoding(String.valueOf(((VCFGenotypeLocusData)locusdata).getAlternateAllele()))); + if ( locusdata.isSNP() && locusdata.isBiallelic() ) + params.addAlternateBase(new VCFGenotypeEncoding(locusdata.getAlternateAlleleList().get(0))); } else { params.setLocations(genotypes.get(0).getLocation(), genotypes.get(0).getReference()); } @@ -189,7 +190,7 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { if ( locusdata != null ) { if ( locusdata.getSLOD() != null ) infoFields.put("SB", String.format("%.2f", locusdata.getSLOD())); - infoFields.put("AF", String.format("%.2f", locusdata.getAlleleFrequency())); + infoFields.put("AF", String.format("%.2f", locusdata.getNonRefAlleleFrequency())); Map otherFields = locusdata.getFields(); if ( otherFields != null ) { infoFields.putAll(otherFields);