GenotypeLocusData now extends Variation.

Also, Variations should be INSERTIONs or DELETIONs (and not just INDELs).
Technically, VCF records can be indels now.
More changes coming


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2150 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-11-24 21:07:55 +00:00
parent 8b30279edc
commit e05cb346f3
16 changed files with 109 additions and 115 deletions

View File

@ -263,7 +263,8 @@ public class RodGLF implements VariationRod, Iterator<RodGLF> {
@Override
public VARIANT_TYPE getType() {
if (this.isSNP()) return VARIANT_TYPE.SNP;
else if (this.isInsertion() || this.isDeletion()) return VARIANT_TYPE.INDEL;
else if (this.isInsertion()) return VARIANT_TYPE.INSERTION;
else if (this.isDeletion()) return VARIANT_TYPE.DELETION;
else return VARIANT_TYPE.REFERENCE;
}

View File

@ -126,7 +126,8 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
@Override
public VARIANT_TYPE getType() {
if (this.isSNP()) return VARIANT_TYPE.SNP;
else if (this.isIndel()) return VARIANT_TYPE.INDEL;
else if (this.isInsertion()) return VARIANT_TYPE.INSERTION;
else if (this.isDeletion()) return VARIANT_TYPE.DELETION;
return VARIANT_TYPE.REFERENCE;
}

View File

@ -61,7 +61,7 @@ public class SimpleIndelROD extends TabularROD implements Genotype, VariationRod
/** @return the VARIANT_TYPE of the current variant */
@Override
public VARIANT_TYPE getType() {
return VARIANT_TYPE.INDEL;
return isInsertion() ? VARIANT_TYPE.INSERTION : VARIANT_TYPE.DELETION;
}
public boolean isSNP() { return false; }

View File

@ -82,8 +82,9 @@ public class rodPicardDbSNP implements VariationRod {
case SNP:
return VARIANT_TYPE.SNP;
case insertion:
return VARIANT_TYPE.INSERTION;
case deletion:
return VARIANT_TYPE.INDEL;
return VARIANT_TYPE.DELETION;
}
return null;
}
@ -131,7 +132,7 @@ public class rodPicardDbSNP implements VariationRod {
* @return true if we're an insertion or deletion
*/
public boolean isIndel() {
return getType() == VARIANT_TYPE.INDEL;
return getType() == VARIANT_TYPE.INSERTION || getType() == VARIANT_TYPE.DELETION;
}
public String getName() {

View File

@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import java.util.*;

View File

@ -48,7 +48,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
}
// generate the calls
GenotypeLocusData locusdata = GenotypeWriterFactory.createSupportedGenotypeLocusData(OUTPUT_FORMAT, ref, context.getLocation());
GenotypeLocusData locusdata = GenotypeWriterFactory.createSupportedGenotypeLocusData(OUTPUT_FORMAT, ref, context.getLocation(), Variation.VARIANT_TYPE.SNP);
if ( locusdata != null ) {
if ( locusdata instanceof ConfidenceBacked ) {
((ConfidenceBacked)locusdata).setConfidence(phredScaledConfidence);
@ -77,9 +77,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
((SLODBacked)locusdata).setSLOD(strandScore);
}
if ( locusdata instanceof AlleleFrequencyBacked ) {
((AlleleFrequencyBacked)locusdata).setAlleleFrequency(overall.getMAF());
}
locusdata.setAlleleFrequency(overall.getMAF());
}
return new Pair<List<Genotype>, GenotypeLocusData>(genotypeCallsFromGenotypeLikelihoods(overall, ref, contexts), locusdata);
}

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.utils.genotype.Variation.VARIANT_TYPE;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
@ -314,17 +315,13 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
// next, the general locus data
// note that calculating strand bias involves overwriting data structures, so we do that last
GenotypeLocusData locusdata = GenotypeWriterFactory.createSupportedGenotypeLocusData(OUTPUT_FORMAT, ref, loc);
GenotypeLocusData locusdata = GenotypeWriterFactory.createSupportedGenotypeLocusData(OUTPUT_FORMAT, ref, loc, VARIANT_TYPE.SNP);
if ( locusdata != null ) {
locusdata.addAlternateAllele(bestAlternateAllele.toString());
locusdata.setAlleleFrequency((double)bestAFguess / (double)(frequencyEstimationPoints-1));
if ( locusdata instanceof ConfidenceBacked ) {
((ConfidenceBacked)locusdata).setConfidence(phredScaledConfidence);
}
if ( locusdata instanceof AlternateAlleleBacked ) {
((AlternateAlleleBacked)locusdata).setAlternateAllele(bestAlternateAllele);
}
if ( locusdata instanceof AlleleFrequencyBacked ) {
((AlleleFrequencyBacked)locusdata).setAlleleFrequency((double)bestAFguess / (double)(frequencyEstimationPoints-1));
}
if ( locusdata instanceof IDBacked ) {
rodDbSNP dbsnp = getDbSNP(tracker);
if ( dbsnp != null )

View File

@ -86,7 +86,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
((PosteriorsBacked)call).setPosteriors(discoveryGL.second.getPosteriors());
}
GenotypeLocusData locusdata = GenotypeWriterFactory.createSupportedGenotypeLocusData(OUTPUT_FORMAT, ref, context.getLocation());
GenotypeLocusData locusdata = GenotypeWriterFactory.createSupportedGenotypeLocusData(OUTPUT_FORMAT, ref, context.getLocation(), Variation.VARIANT_TYPE.SNP);
if ( locusdata != null ) {
if ( locusdata instanceof ConfidenceBacked ) {
((ConfidenceBacked)locusdata).setConfidence(phredScaledConfidence);

View File

@ -1,23 +0,0 @@
package org.broadinstitute.sting.utils.genotype;
/**
* @author ebanks
* Interface AlleleFrequencyBacked
*
* this interface indicates that the genotype is
* backed up by allele frequency information.
*/
public interface AlleleFrequencyBacked {
/**
*
* @return returns the best allele frequency for this genotype
*/
public double getAlleleFrequency();
/**
*
* @param frequency the allele frequency for this genotype
*/
public void setAlleleFrequency(double frequency);
}

View File

@ -1,24 +0,0 @@
package org.broadinstitute.sting.utils.genotype;
/**
* @author ebanks
* Interface AlternateAlleleBacked
*
* this interface indicates that the genotype is
* backed up by alternate allele information.
*/
public interface AlternateAlleleBacked {
/**
*
* @return returns the alternate allele for this genotype
*/
public char getAlternateAllele();
/**
*
* @param alt the alternate allele base for this genotype
*/
public void setAlternateAllele(char alt);
}

View File

@ -65,7 +65,7 @@ public class BasicVariation implements Variation {
*/
@Override
public VARIANT_TYPE getType() {
if (mLength != 0) return VARIANT_TYPE.INDEL;
if (mLength != 0) return VARIANT_TYPE.INSERTION;
return (isSNP()) ? VARIANT_TYPE.SNP : VARIANT_TYPE.REFERENCE;
}

View File

@ -1,7 +1,5 @@
package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.utils.GenomeLoc;
/**
* @author ebanks
@ -10,19 +8,18 @@ import org.broadinstitute.sting.utils.GenomeLoc;
* <p/>
* represents the locus specific data associated with a genotype object.
*/
public interface GenotypeLocusData {
public interface GenotypeLocusData extends Variation {
/**
* get the reference base.
* @return a character, representing the reference base
*/
public char getReference();
/**
* get the genotype's location
*
* @return a GenomeLoc representing the location
* @param alt the alternate allele base for this genotype
*/
public GenomeLoc getLocation();
public void addAlternateAllele(String alt);
/**
*
* @param frequency the allele frequency for this genotype
*/
public void setAlleleFrequency(double frequency);
}

View File

@ -99,12 +99,13 @@ public class GenotypeWriterFactory {
* @param format the format
* @param ref the reference base
* @param loc the location
* @param type the variant type
* @return an unpopulated genotype locus data object
*/
public static GenotypeLocusData createSupportedGenotypeLocusData(GENOTYPE_FORMAT format, char ref, GenomeLoc loc) {
public static GenotypeLocusData createSupportedGenotypeLocusData(GENOTYPE_FORMAT format, char ref, GenomeLoc loc, Variation.VARIANT_TYPE type) {
switch (format) {
case VCF:
return new VCFGenotypeLocusData(ref, loc);
return new VCFGenotypeLocusData(ref, loc, type);
case GELI:
case GELI_BINARY:
return null;

View File

@ -14,10 +14,12 @@ import java.util.List;
public interface Variation {
// the types of variants we currently allow
public enum VARIANT_TYPE {
SNP, INDEL, REFERENCE // though reference is not really a variant, we need to represent it
SNP, INSERTION, DELETION, REFERENCE // though reference is not really a variant, we need to represent it
}
/** are we bi-allelic? */
/**
* @return true if we are bi-allelic?
*/
public boolean isBiallelic();
/**
@ -39,7 +41,7 @@ public interface Variation {
public VARIANT_TYPE getType();
/**
* are we a SNP? If not we're a Indel/deletion or the reference. This method must be call before you use
* are we a SNP? If not we're a Indel/deletion or the reference. This method must be called before you use
* the convenience methods getAlternativeBaseForSNP or getReferenceForSNP, to ensure that you're working with a SNP
*
* @return true if we're a SNP
@ -127,7 +129,7 @@ public interface Variation {
/**
* gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
*
* @return a char, representing the alternate base
* @return a char, representing the reference base
*/
public char getReferenceForSNP();

View File

@ -3,8 +3,7 @@ package org.broadinstitute.sting.utils.genotype.vcf;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.genotype.*;
import java.util.HashMap;
import java.util.Map;
import java.util.*;
/**
* @author ebanks
@ -13,7 +12,7 @@ import java.util.Map;
* <p/>
* represents the meta data for a genotype object.
*/
public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked, SLODBacked, IDBacked, AlternateAlleleBacked, AlleleFrequencyBacked, ArbitraryFieldsBacked {
public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked, SLODBacked, IDBacked, ArbitraryFieldsBacked {
// the discovery lod score
private double mConfidence = 0.0;
@ -29,7 +28,10 @@ public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked
// the ref base and alt bases
private char mRefBase;
private char mAltBase = 'N';
private List<String> mAltBases = new ArrayList<String>();
// the variant type
private VARIANT_TYPE mType = VARIANT_TYPE.SNP;
// the id
private String mID;
@ -42,18 +44,20 @@ public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked
*
* @param ref the reference base
* @param loc the locus
* @param type the variant type
*/
public VCFGenotypeLocusData(char ref, GenomeLoc loc) {
public VCFGenotypeLocusData(char ref, GenomeLoc loc, VARIANT_TYPE type) {
mRefBase = ref;
mLoc = loc;
mType = type;
}
/**
* get the reference base.
* @return a character, representing the reference base
*/
public char getReference() {
return mRefBase;
public String getReference() {
return String.valueOf(mRefBase);
}
/**
@ -65,20 +69,68 @@ public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked
return mLoc;
}
/**
*
* @return returns the alternate allele for this genotype
*/
public char getAlternateAllele() {
return mAltBase;
public boolean isBiallelic() {
return mAltBases.size() == 1;
}
/**
*
* @param alt the alternate allele base for this genotype
*/
public void setAlternateAllele(char alt) {
mAltBase = alt;
public boolean isSNP() {
return mType == VARIANT_TYPE.SNP;
}
public boolean isInsertion() {
return mType == VARIANT_TYPE.INSERTION;
}
public boolean isIndel() {
return mType == VARIANT_TYPE.INSERTION || mType == VARIANT_TYPE.DELETION;
}
public boolean isDeletion() {
return mType == VARIANT_TYPE.DELETION;
}
public boolean isReference() {
return mType == VARIANT_TYPE.REFERENCE;
}
public VARIANT_TYPE getType() {
return mType;
}
public double getNonRefAlleleFrequency() {
return mAlleleFrequency;
}
public double getNegLog10PError() {
return mConfidence / 10.0;
}
public List<String> getAlternateAlleleList() {
return mAltBases;
}
public void addAlternateAllele(String alt) {
mAltBases.add(alt);
}
public List<String> getAlleleList() {
LinkedList<String> alleles = new LinkedList<String>(mAltBases);
alleles.addFirst(getReference());
return alleles;
}
public char getAlternativeBaseForSNP() {
if ( !isSNP() )
throw new IllegalStateException("This variant is not a SNP");
if ( mAltBases.size() == 0 )
throw new IllegalStateException("No alternate alleles have been set");
return mAltBases.get(0).charAt(0);
}
public char getReferenceForSNP() {
if ( !isSNP() )
throw new IllegalStateException("This variant is not a SNP");
return mRefBase;
}
/**
@ -115,15 +167,6 @@ public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked
mSLOD = slod;
}
/**
* get the allele frequency
*
* @return the allele frequency
*/
public double getAlleleFrequency() {
return mAlleleFrequency;
}
/**
*
* @param frequency the allele frequency for this genotype

View File

@ -121,10 +121,11 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
if ( locusdata == null )
throw new IllegalArgumentException("Unable to parse out the current location: genotype array must contain at least one entry or have locusdata");
params.setLocations(locusdata.getLocation(), locusdata.getReference());
params.setLocations(locusdata.getLocation(), locusdata.getReference().charAt(0));
// if there is no genotype data, we'll also need to set an alternate allele
params.addAlternateBase(new VCFGenotypeEncoding(String.valueOf(((VCFGenotypeLocusData)locusdata).getAlternateAllele())));
if ( locusdata.isSNP() && locusdata.isBiallelic() )
params.addAlternateBase(new VCFGenotypeEncoding(locusdata.getAlternateAlleleList().get(0)));
} else {
params.setLocations(genotypes.get(0).getLocation(), genotypes.get(0).getReference());
}
@ -189,7 +190,7 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
if ( locusdata != null ) {
if ( locusdata.getSLOD() != null )
infoFields.put("SB", String.format("%.2f", locusdata.getSLOD()));
infoFields.put("AF", String.format("%.2f", locusdata.getAlleleFrequency()));
infoFields.put("AF", String.format("%.2f", locusdata.getNonRefAlleleFrequency()));
Map<String, String> otherFields = locusdata.getFields();
if ( otherFields != null ) {
infoFields.putAll(otherFields);