GenotypeLocusData now extends Variation.
Also, Variations should be INSERTIONs or DELETIONs (and not just INDELs). Technically, VCF records can be indels now. More changes coming git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2150 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
8b30279edc
commit
e05cb346f3
|
|
@ -263,7 +263,8 @@ public class RodGLF implements VariationRod, Iterator<RodGLF> {
|
|||
@Override
|
||||
public VARIANT_TYPE getType() {
|
||||
if (this.isSNP()) return VARIANT_TYPE.SNP;
|
||||
else if (this.isInsertion() || this.isDeletion()) return VARIANT_TYPE.INDEL;
|
||||
else if (this.isInsertion()) return VARIANT_TYPE.INSERTION;
|
||||
else if (this.isDeletion()) return VARIANT_TYPE.DELETION;
|
||||
else return VARIANT_TYPE.REFERENCE;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -126,7 +126,8 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
|
|||
@Override
|
||||
public VARIANT_TYPE getType() {
|
||||
if (this.isSNP()) return VARIANT_TYPE.SNP;
|
||||
else if (this.isIndel()) return VARIANT_TYPE.INDEL;
|
||||
else if (this.isInsertion()) return VARIANT_TYPE.INSERTION;
|
||||
else if (this.isDeletion()) return VARIANT_TYPE.DELETION;
|
||||
return VARIANT_TYPE.REFERENCE;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -61,7 +61,7 @@ public class SimpleIndelROD extends TabularROD implements Genotype, VariationRod
|
|||
/** @return the VARIANT_TYPE of the current variant */
|
||||
@Override
|
||||
public VARIANT_TYPE getType() {
|
||||
return VARIANT_TYPE.INDEL;
|
||||
return isInsertion() ? VARIANT_TYPE.INSERTION : VARIANT_TYPE.DELETION;
|
||||
}
|
||||
|
||||
public boolean isSNP() { return false; }
|
||||
|
|
|
|||
|
|
@ -82,8 +82,9 @@ public class rodPicardDbSNP implements VariationRod {
|
|||
case SNP:
|
||||
return VARIANT_TYPE.SNP;
|
||||
case insertion:
|
||||
return VARIANT_TYPE.INSERTION;
|
||||
case deletion:
|
||||
return VARIANT_TYPE.INDEL;
|
||||
return VARIANT_TYPE.DELETION;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
|
@ -131,7 +132,7 @@ public class rodPicardDbSNP implements VariationRod {
|
|||
* @return true if we're an insertion or deletion
|
||||
*/
|
||||
public boolean isIndel() {
|
||||
return getType() == VARIANT_TYPE.INDEL;
|
||||
return getType() == VARIANT_TYPE.INSERTION || getType() == VARIANT_TYPE.DELETION;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
|
|||
}
|
||||
|
||||
// generate the calls
|
||||
GenotypeLocusData locusdata = GenotypeWriterFactory.createSupportedGenotypeLocusData(OUTPUT_FORMAT, ref, context.getLocation());
|
||||
GenotypeLocusData locusdata = GenotypeWriterFactory.createSupportedGenotypeLocusData(OUTPUT_FORMAT, ref, context.getLocation(), Variation.VARIANT_TYPE.SNP);
|
||||
if ( locusdata != null ) {
|
||||
if ( locusdata instanceof ConfidenceBacked ) {
|
||||
((ConfidenceBacked)locusdata).setConfidence(phredScaledConfidence);
|
||||
|
|
@ -77,9 +77,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
|
|||
|
||||
((SLODBacked)locusdata).setSLOD(strandScore);
|
||||
}
|
||||
if ( locusdata instanceof AlleleFrequencyBacked ) {
|
||||
((AlleleFrequencyBacked)locusdata).setAlleleFrequency(overall.getMAF());
|
||||
}
|
||||
locusdata.setAlleleFrequency(overall.getMAF());
|
||||
}
|
||||
return new Pair<List<Genotype>, GenotypeLocusData>(genotypeCallsFromGenotypeLikelihoods(overall, ref, contexts), locusdata);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation.VARIANT_TYPE;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
|
|
@ -314,17 +315,13 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
|
|||
|
||||
// next, the general locus data
|
||||
// note that calculating strand bias involves overwriting data structures, so we do that last
|
||||
GenotypeLocusData locusdata = GenotypeWriterFactory.createSupportedGenotypeLocusData(OUTPUT_FORMAT, ref, loc);
|
||||
GenotypeLocusData locusdata = GenotypeWriterFactory.createSupportedGenotypeLocusData(OUTPUT_FORMAT, ref, loc, VARIANT_TYPE.SNP);
|
||||
if ( locusdata != null ) {
|
||||
locusdata.addAlternateAllele(bestAlternateAllele.toString());
|
||||
locusdata.setAlleleFrequency((double)bestAFguess / (double)(frequencyEstimationPoints-1));
|
||||
if ( locusdata instanceof ConfidenceBacked ) {
|
||||
((ConfidenceBacked)locusdata).setConfidence(phredScaledConfidence);
|
||||
}
|
||||
if ( locusdata instanceof AlternateAlleleBacked ) {
|
||||
((AlternateAlleleBacked)locusdata).setAlternateAllele(bestAlternateAllele);
|
||||
}
|
||||
if ( locusdata instanceof AlleleFrequencyBacked ) {
|
||||
((AlleleFrequencyBacked)locusdata).setAlleleFrequency((double)bestAFguess / (double)(frequencyEstimationPoints-1));
|
||||
}
|
||||
if ( locusdata instanceof IDBacked ) {
|
||||
rodDbSNP dbsnp = getDbSNP(tracker);
|
||||
if ( dbsnp != null )
|
||||
|
|
|
|||
|
|
@ -86,7 +86,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
|
|||
((PosteriorsBacked)call).setPosteriors(discoveryGL.second.getPosteriors());
|
||||
}
|
||||
|
||||
GenotypeLocusData locusdata = GenotypeWriterFactory.createSupportedGenotypeLocusData(OUTPUT_FORMAT, ref, context.getLocation());
|
||||
GenotypeLocusData locusdata = GenotypeWriterFactory.createSupportedGenotypeLocusData(OUTPUT_FORMAT, ref, context.getLocation(), Variation.VARIANT_TYPE.SNP);
|
||||
if ( locusdata != null ) {
|
||||
if ( locusdata instanceof ConfidenceBacked ) {
|
||||
((ConfidenceBacked)locusdata).setConfidence(phredScaledConfidence);
|
||||
|
|
|
|||
|
|
@ -1,23 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
* Interface AlleleFrequencyBacked
|
||||
*
|
||||
* this interface indicates that the genotype is
|
||||
* backed up by allele frequency information.
|
||||
*/
|
||||
public interface AlleleFrequencyBacked {
|
||||
|
||||
/**
|
||||
*
|
||||
* @return returns the best allele frequency for this genotype
|
||||
*/
|
||||
public double getAlleleFrequency();
|
||||
|
||||
/**
|
||||
*
|
||||
* @param frequency the allele frequency for this genotype
|
||||
*/
|
||||
public void setAlleleFrequency(double frequency);
|
||||
}
|
||||
|
|
@ -1,24 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
* Interface AlternateAlleleBacked
|
||||
*
|
||||
* this interface indicates that the genotype is
|
||||
* backed up by alternate allele information.
|
||||
*/
|
||||
public interface AlternateAlleleBacked {
|
||||
|
||||
/**
|
||||
*
|
||||
* @return returns the alternate allele for this genotype
|
||||
*/
|
||||
public char getAlternateAllele();
|
||||
|
||||
/**
|
||||
*
|
||||
* @param alt the alternate allele base for this genotype
|
||||
*/
|
||||
public void setAlternateAllele(char alt);
|
||||
|
||||
}
|
||||
|
|
@ -65,7 +65,7 @@ public class BasicVariation implements Variation {
|
|||
*/
|
||||
@Override
|
||||
public VARIANT_TYPE getType() {
|
||||
if (mLength != 0) return VARIANT_TYPE.INDEL;
|
||||
if (mLength != 0) return VARIANT_TYPE.INSERTION;
|
||||
return (isSNP()) ? VARIANT_TYPE.SNP : VARIANT_TYPE.REFERENCE;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
|
|
@ -10,19 +8,18 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
* <p/>
|
||||
* represents the locus specific data associated with a genotype object.
|
||||
*/
|
||||
public interface GenotypeLocusData {
|
||||
public interface GenotypeLocusData extends Variation {
|
||||
|
||||
/**
|
||||
* get the reference base.
|
||||
* @return a character, representing the reference base
|
||||
*/
|
||||
public char getReference();
|
||||
|
||||
/**
|
||||
* get the genotype's location
|
||||
*
|
||||
* @return a GenomeLoc representing the location
|
||||
* @param alt the alternate allele base for this genotype
|
||||
*/
|
||||
public GenomeLoc getLocation();
|
||||
public void addAlternateAllele(String alt);
|
||||
|
||||
/**
|
||||
*
|
||||
* @param frequency the allele frequency for this genotype
|
||||
*/
|
||||
public void setAlleleFrequency(double frequency);
|
||||
|
||||
}
|
||||
|
|
@ -99,12 +99,13 @@ public class GenotypeWriterFactory {
|
|||
* @param format the format
|
||||
* @param ref the reference base
|
||||
* @param loc the location
|
||||
* @param type the variant type
|
||||
* @return an unpopulated genotype locus data object
|
||||
*/
|
||||
public static GenotypeLocusData createSupportedGenotypeLocusData(GENOTYPE_FORMAT format, char ref, GenomeLoc loc) {
|
||||
public static GenotypeLocusData createSupportedGenotypeLocusData(GENOTYPE_FORMAT format, char ref, GenomeLoc loc, Variation.VARIANT_TYPE type) {
|
||||
switch (format) {
|
||||
case VCF:
|
||||
return new VCFGenotypeLocusData(ref, loc);
|
||||
return new VCFGenotypeLocusData(ref, loc, type);
|
||||
case GELI:
|
||||
case GELI_BINARY:
|
||||
return null;
|
||||
|
|
|
|||
|
|
@ -14,10 +14,12 @@ import java.util.List;
|
|||
public interface Variation {
|
||||
// the types of variants we currently allow
|
||||
public enum VARIANT_TYPE {
|
||||
SNP, INDEL, REFERENCE // though reference is not really a variant, we need to represent it
|
||||
SNP, INSERTION, DELETION, REFERENCE // though reference is not really a variant, we need to represent it
|
||||
}
|
||||
|
||||
/** are we bi-allelic? */
|
||||
/**
|
||||
* @return true if we are bi-allelic?
|
||||
*/
|
||||
public boolean isBiallelic();
|
||||
|
||||
/**
|
||||
|
|
@ -39,7 +41,7 @@ public interface Variation {
|
|||
public VARIANT_TYPE getType();
|
||||
|
||||
/**
|
||||
* are we a SNP? If not we're a Indel/deletion or the reference. This method must be call before you use
|
||||
* are we a SNP? If not we're a Indel/deletion or the reference. This method must be called before you use
|
||||
* the convenience methods getAlternativeBaseForSNP or getReferenceForSNP, to ensure that you're working with a SNP
|
||||
*
|
||||
* @return true if we're a SNP
|
||||
|
|
@ -127,7 +129,7 @@ public interface Variation {
|
|||
/**
|
||||
* gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
|
||||
*
|
||||
* @return a char, representing the alternate base
|
||||
* @return a char, representing the reference base
|
||||
*/
|
||||
public char getReferenceForSNP();
|
||||
|
||||
|
|
|
|||
|
|
@ -3,8 +3,7 @@ package org.broadinstitute.sting.utils.genotype.vcf;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
|
|
@ -13,7 +12,7 @@ import java.util.Map;
|
|||
* <p/>
|
||||
* represents the meta data for a genotype object.
|
||||
*/
|
||||
public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked, SLODBacked, IDBacked, AlternateAlleleBacked, AlleleFrequencyBacked, ArbitraryFieldsBacked {
|
||||
public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked, SLODBacked, IDBacked, ArbitraryFieldsBacked {
|
||||
|
||||
// the discovery lod score
|
||||
private double mConfidence = 0.0;
|
||||
|
|
@ -29,7 +28,10 @@ public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked
|
|||
|
||||
// the ref base and alt bases
|
||||
private char mRefBase;
|
||||
private char mAltBase = 'N';
|
||||
private List<String> mAltBases = new ArrayList<String>();
|
||||
|
||||
// the variant type
|
||||
private VARIANT_TYPE mType = VARIANT_TYPE.SNP;
|
||||
|
||||
// the id
|
||||
private String mID;
|
||||
|
|
@ -42,18 +44,20 @@ public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked
|
|||
*
|
||||
* @param ref the reference base
|
||||
* @param loc the locus
|
||||
* @param type the variant type
|
||||
*/
|
||||
public VCFGenotypeLocusData(char ref, GenomeLoc loc) {
|
||||
public VCFGenotypeLocusData(char ref, GenomeLoc loc, VARIANT_TYPE type) {
|
||||
mRefBase = ref;
|
||||
mLoc = loc;
|
||||
mType = type;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the reference base.
|
||||
* @return a character, representing the reference base
|
||||
*/
|
||||
public char getReference() {
|
||||
return mRefBase;
|
||||
public String getReference() {
|
||||
return String.valueOf(mRefBase);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -65,20 +69,68 @@ public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked
|
|||
return mLoc;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @return returns the alternate allele for this genotype
|
||||
*/
|
||||
public char getAlternateAllele() {
|
||||
return mAltBase;
|
||||
public boolean isBiallelic() {
|
||||
return mAltBases.size() == 1;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param alt the alternate allele base for this genotype
|
||||
*/
|
||||
public void setAlternateAllele(char alt) {
|
||||
mAltBase = alt;
|
||||
public boolean isSNP() {
|
||||
return mType == VARIANT_TYPE.SNP;
|
||||
}
|
||||
|
||||
public boolean isInsertion() {
|
||||
return mType == VARIANT_TYPE.INSERTION;
|
||||
}
|
||||
|
||||
public boolean isIndel() {
|
||||
return mType == VARIANT_TYPE.INSERTION || mType == VARIANT_TYPE.DELETION;
|
||||
}
|
||||
|
||||
public boolean isDeletion() {
|
||||
return mType == VARIANT_TYPE.DELETION;
|
||||
}
|
||||
|
||||
public boolean isReference() {
|
||||
return mType == VARIANT_TYPE.REFERENCE;
|
||||
}
|
||||
|
||||
public VARIANT_TYPE getType() {
|
||||
return mType;
|
||||
}
|
||||
|
||||
public double getNonRefAlleleFrequency() {
|
||||
return mAlleleFrequency;
|
||||
}
|
||||
|
||||
public double getNegLog10PError() {
|
||||
return mConfidence / 10.0;
|
||||
}
|
||||
|
||||
public List<String> getAlternateAlleleList() {
|
||||
return mAltBases;
|
||||
}
|
||||
|
||||
public void addAlternateAllele(String alt) {
|
||||
mAltBases.add(alt);
|
||||
}
|
||||
|
||||
public List<String> getAlleleList() {
|
||||
LinkedList<String> alleles = new LinkedList<String>(mAltBases);
|
||||
alleles.addFirst(getReference());
|
||||
return alleles;
|
||||
}
|
||||
|
||||
public char getAlternativeBaseForSNP() {
|
||||
if ( !isSNP() )
|
||||
throw new IllegalStateException("This variant is not a SNP");
|
||||
if ( mAltBases.size() == 0 )
|
||||
throw new IllegalStateException("No alternate alleles have been set");
|
||||
return mAltBases.get(0).charAt(0);
|
||||
}
|
||||
|
||||
public char getReferenceForSNP() {
|
||||
if ( !isSNP() )
|
||||
throw new IllegalStateException("This variant is not a SNP");
|
||||
return mRefBase;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -115,15 +167,6 @@ public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked
|
|||
mSLOD = slod;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the allele frequency
|
||||
*
|
||||
* @return the allele frequency
|
||||
*/
|
||||
public double getAlleleFrequency() {
|
||||
return mAlleleFrequency;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param frequency the allele frequency for this genotype
|
||||
|
|
|
|||
|
|
@ -121,10 +121,11 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
|
|||
if ( locusdata == null )
|
||||
throw new IllegalArgumentException("Unable to parse out the current location: genotype array must contain at least one entry or have locusdata");
|
||||
|
||||
params.setLocations(locusdata.getLocation(), locusdata.getReference());
|
||||
params.setLocations(locusdata.getLocation(), locusdata.getReference().charAt(0));
|
||||
|
||||
// if there is no genotype data, we'll also need to set an alternate allele
|
||||
params.addAlternateBase(new VCFGenotypeEncoding(String.valueOf(((VCFGenotypeLocusData)locusdata).getAlternateAllele())));
|
||||
if ( locusdata.isSNP() && locusdata.isBiallelic() )
|
||||
params.addAlternateBase(new VCFGenotypeEncoding(locusdata.getAlternateAlleleList().get(0)));
|
||||
} else {
|
||||
params.setLocations(genotypes.get(0).getLocation(), genotypes.get(0).getReference());
|
||||
}
|
||||
|
|
@ -189,7 +190,7 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
|
|||
if ( locusdata != null ) {
|
||||
if ( locusdata.getSLOD() != null )
|
||||
infoFields.put("SB", String.format("%.2f", locusdata.getSLOD()));
|
||||
infoFields.put("AF", String.format("%.2f", locusdata.getAlleleFrequency()));
|
||||
infoFields.put("AF", String.format("%.2f", locusdata.getNonRefAlleleFrequency()));
|
||||
Map<String, String> otherFields = locusdata.getFields();
|
||||
if ( otherFields != null ) {
|
||||
infoFields.putAll(otherFields);
|
||||
|
|
|
|||
Loading…
Reference in New Issue