Removing the AllelicVariant interface, which is replaced by the Variation interface.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1770 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-10-06 17:44:24 +00:00
parent c1e1d910cb
commit 3aec76136f
13 changed files with 257 additions and 183 deletions

View File

@ -1,148 +0,0 @@
package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.List;
/**
* Created by IntelliJ IDEA.
* User: asivache
* Date: Apr 1, 2009
* Time: 11:02:03 AM
* To change this template use File | Settings | File Templates.
*/
public interface AllelicVariant extends ReferenceOrderedDatum {
// ----------------------------------------------------------------------
//
// manipulating the SNP information
//
// ----------------------------------------------------------------------
/** Location of this variant on the reference (on the forward strand).
*
* @return
*/
GenomeLoc getLocation();
/** Returns bases in the reference allele as a String. String can be empty (as in insertion into
* the reference), can contain a single character (as in SNP or one-base deletion), or multiple characters
* (for longer indels).
*
* @return reference allele, forward strand
*/
String getRefBasesFWD();
/** Returns reference (major) allele base for a SNP variant as a character; should throw IllegalStateException
* if variant is not a SNP.
*
* @return reference base on the forward strand
*/
char getRefSnpFWD() throws IllegalStateException;
/** Returns bases in the alternative allele as a String. String can be empty (as in deletion from
* the reference), can contain a single character (as in SNP or one-base insertion), or multiple characters
* (for longer indels).
*
* @return alternative allele, forward strand
*/
String getAltBasesFWD();
/** Returns alternative (minor) allele base for a SNP variant as a character; should throw IllegalStateException
* if variant is not a SNP.
*
* @return alternative allele base on the forward starnd
*/
char getAltSnpFWD() throws IllegalStateException;
/** Returns true if all observed alleles are reference alleles. All is<Variant> methods (where Variant=SNP,Insertion, etc) should
* return false at such site to ensure consistency. This method is included for use with genotyping calls (isGenotype()==true), it makes
* no sense for, e.g. dbSNP and should return false for the latter.
* @return
*/
boolean isReference();
/** Is this variant a SNP?
*
* @return true or false
*/
boolean isSNP();
/** Is this variant an insertion? The contract requires isIndel() to return true
* if this method returns true.
*
* @return true or false
*/
boolean isInsertion();
/** Is this variant a deletion? The contract requires isIndel() to return true
* if isDeletion() returns true.
*
* @return true or false
*/
boolean isDeletion();
/** Is this variant an insertion or a deletion? The contract requires
* this to be true if either isInsertion() or isDeletion() returns true. However,
* this method is currently allowed to return true even if neither of isInsertion()
* and isDeletion() does.
* @return
*/
boolean isIndel();
/** Returns minor allele frequency.
*
* @return
*/
double getMAF() ;
/** Returns heterozygosity, a more accessible general feature of a variant.
*
* @return
*/
double getHeterozygosity() ;
/** Is this variant an actual genotype (such as individual call from sequencing, HapMap chip etc), or
* population allelic variant (call from pooled sequencing, dbSNP site etc). Only if variant is a genotype, there
* is a meaningful question of, e.g., whether it is a het, or homogeneous non-ref.
*
* @return true if this variant is an actual genotype.
*/
boolean isGenotype();
/** Returns phred-mapped confidence in variation event (e.g. MAQ's SNP confidence, or AlleleCaller's best vs. ref).
*
* @return
*/
double getVariationConfidence();
/** Returns phred-mapped confidence in called genotype (e.g. MAQ's consensus confidence, or AlleleCaller's
* best vs next-best.
* @return
*/
double getConsensusConfidence();
/** Returns actual observed genotype. Allowed to return more than two alleles (@see #getPloidy()). If this variant
* is not a genotype, should throw an IllegalStateException.
* @return
*/
List<String> getGenotype() throws IllegalStateException;
/** Return actual number of observed alleles (chromosomes) in the genotype. If this variant is not a genotype,
* should throw IllegalStateException.
* @return
*/
int getPloidy() throws IllegalStateException;
/** Returns true if the site has at most two known or observed alleles (ref and non-ref), or false if there are > 2 allelic variants known or observed. When
* the implementing class is a genotype, alleles should be always counted including the reference allele whether it was observed in the particular
* individual or not: i.e. if the reference is 'C', then both 'CA' and 'AA' genotypes must be reported as bi-allelic, while 'AT' is <i>not</i> bi-allelic (since there are
* two allelic variants, 'A' and 'T' <i>in addition</i> to the (known) reference variant 'C').
* @return
*/
boolean isBiallelic();
/** returns the length of the variant. For SNPs this is just 1.
*/
int length();
}

View File

@ -37,7 +37,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class RodGeliText extends BasicReferenceOrderedDatum implements VariationRod, VariantBackedByGenotype, AllelicVariant {
public class RodGeliText extends BasicReferenceOrderedDatum implements VariationRod, VariantBackedByGenotype {
public enum Genotype_Strings {
AA, AC, AG, AT, CC, CG, CT, GG, GT, TT
}

View File

@ -18,7 +18,7 @@ import java.util.regex.Pattern;
* Time: 10:47:14 AM
* To change this template use File | Settings | File Templates.
*/
public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements AllelicVariant, VariationRod, VariantBackedByGenotype {
public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements VariationRod, VariantBackedByGenotype {
private String contig, source, feature, strand, frame;
private long start, stop;
private double score;

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.util.List;
/**
@ -8,7 +10,7 @@ import java.util.List;
* chr1:1104841 C N 0.000000 -69.937928 -69.937928 0.000000 0.000000 324.000000 162 0 0
* chr1:1104842 A N 0.000000 -84.816002 -84.816002 0.000000 0.000000 324.000000 162 0 0
*/
public interface SNPCallFromGenotypes extends AllelicVariant {
public interface SNPCallFromGenotypes extends Variation {
public int nIndividuals();
public int nHomRefGenotypes();

View File

@ -1,10 +1,11 @@
package org.broadinstitute.sting.gatk.refdata;
import java.util.*;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.util.Arrays;
import java.util.List;
public class SangerSNPROD extends TabularROD implements SNPCallFromGenotypes {
public SangerSNPROD(final String name) {
super(name);
@ -14,15 +15,71 @@ public class SangerSNPROD extends TabularROD implements SNPCallFromGenotypes {
loc = GenomeLocParser.createGenomeLoc(this.get("0"), Long.parseLong(this.get("1")));
return loc;
}
/**
* get the reference base(s) at this position
*
* @return the reference base or bases, as a string
*/
@Override
public String getReference() {
return String.valueOf(getRefBasesFWD().charAt(0));
}
public String getRefBasesFWD() { return this.get("2"); }
public char getRefSnpFWD() throws IllegalStateException { return getRefBasesFWD().charAt(0); }
public String getAltBasesFWD() { return this.get("3"); }
public char getAltSnpFWD() throws IllegalStateException { return getAltBasesFWD().charAt(0); }
public boolean isReference() { return getVariationConfidence() < 0.01; }
/**
* get the frequency of this variant, if we're a variant. If we're reference this method
* should return 0.
*
* @return double with the stored frequency
*/
@Override
public double getNonRefAlleleFrequency() {
return this.getMAF();
}
/**
* A convenience method, for switching over the variation type
*
* @return the VARIANT_TYPE of the current variant
*/
@Override
public VARIANT_TYPE getType() {
if (isReference()) return VARIANT_TYPE.REFERENCE;
else return VARIANT_TYPE.SNP;
}
public boolean isSNP() { return ! isReference(); }
public boolean isInsertion() { return false; }
public boolean isDeletion() { return false; }
public boolean isIndel() { return false; }
/**
* gets the alternate base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
* of
*
* @return a char, representing the alternate base
*/
@Override
public char getAlternativeBaseForSNP() {
return this.getAltSnpFWD();
}
/**
* gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
*
* @return a char, representing the alternate base
*/
@Override
public char getReferenceForSNP() {
return this.getRefSnpFWD();
}
public double getMAF() { return -1; }
public double getHeterozygosity() { return -1; }
public boolean isGenotype() { return false; }
@ -31,6 +88,37 @@ public class SangerSNPROD extends TabularROD implements SNPCallFromGenotypes {
public List<String> getGenotype() throws IllegalStateException { throw new IllegalStateException(); }
public int getPloidy() throws IllegalStateException { return 2; }
public boolean isBiallelic() { return true; }
/**
* get the -1 * (log 10 of the error value)
*
* @return the postive number space log based error estimate
*/
@Override
public double getNegLog10PError() {
return this.getVariationConfidence();
}
/**
* gets the alternate base. Use this method if we're biallelic
*
* @return
*/
@Override
public String getAlternateBases() {
return this.get("3");
}
/**
* gets the alternate bases. Use this method if the allele count is greater then 2 (not biallelic)
*
* @return
*/
@Override
public List<String> getAlternateBaseList() {
return Arrays.asList(this.get("3"));
}
public int length() { return 1; }
// SNPCallFromGenotypes interface

View File

@ -2,10 +2,13 @@ package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.util.*;
import java.util.Arrays;
import java.util.List;
public class SequenomROD extends TabularROD implements AllelicVariant {
public class SequenomROD extends TabularROD implements Variation {
public SequenomROD(String name) {
super(name);
@ -16,29 +19,102 @@ public class SequenomROD extends TabularROD implements AllelicVariant {
return GenomeLocParser.createGenomeLoc(this.get("0"), pos);
}
/**
* get the reference base(s) at this position
*
* @return the reference base or bases, as a string
*/
@Override
public String getReference() {
return "";
}
public List<String> getFWDAlleles() {
return Arrays.asList(this.get("2"));
}
public String getFWDRefBases() { return ""; }
public String getAltBasesFWD() { return getFWDAlleles().get(0); }
public String getRefBasesFWD() { return ""; }
public char getRefSnpFWD() { return 'N'; }
public char getAltSnpFWD() { return getAltBasesFWD().charAt(0); }
public char getRef() { return 'N'; }
public List<String> getGenotype() { return getFWDAlleles(); }
public boolean isGenotype() { return false; }
public boolean isPointGenotype() { return true; }
public boolean isIndelGenotype() { return false; }
/**
* get the frequency of this variant, if we're a variant. If we're reference this method
* should return 0.
*
* @return double with the stored frequency
*/
@Override
public double getNonRefAlleleFrequency() {
return 0.0;
}
/**
* A convenience method, for switching over the variation type
*
* @return the VARIANT_TYPE of the current variant
*/
@Override
public VARIANT_TYPE getType() {
return VARIANT_TYPE.SNP;
}
public boolean isSNP() { return true; }
public boolean isReference() { return false; }
public boolean isInsertion() { return false; }
public boolean isDeletion() { return false; }
public boolean isIndel() { return false; }
public double getVariantConfidence() { return 0.0; }
public double getVariationConfidence() { return 0.0; }
public double getConsensusConfidence() { return 0.0; }
/**
* gets the alternate base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
* of
*
* @return a char, representing the alternate base
*/
@Override
public char getAlternativeBaseForSNP() {
return 'N';
}
/**
* gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
*
* @return a char, representing the alternate base
*/
@Override
public char getReferenceForSNP() {
return getAltBasesFWD().charAt(0);
}
public boolean isBiallelic() { return true; }
/**
* get the -1 * (log 10 of the error value)
*
* @return the postive number space log based error estimate
*/
@Override
public double getNegLog10PError() {
return 0.0;
}
/**
* gets the alternate bases. Use this method if we're biallelic
*
* @return
*/
@Override
public String getAlternateBases() {
return getAltBasesFWD();
}
/**
* gets the alternate bases. Use this method if the allele count is greater then 2 (not biallelic)
*
* @return
*/
@Override
public List<String> getAlternateBaseList() {
throw new StingException("SequenomRod is not biallelic");
}
public boolean isHom() { return false; }
public boolean isHet() { return false; }
public double getHeterozygosity() { return 0.0; }

View File

@ -6,7 +6,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
import java.util.Arrays;
import java.util.List;
public class SimpleIndelROD extends TabularROD implements Genotype, AllelicVariant, VariationRod {
public class SimpleIndelROD extends TabularROD implements Genotype, VariationRod {
private boolean KGENOMES_FORMAT = false, checkedFormat = false;

View File

@ -20,7 +20,7 @@ import java.util.List;
* Time: 10:47:14 AM
* To change this template use File | Settings | File Templates.
*/
public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod, VariantBackedByGenotype, AllelicVariant {
public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod, VariantBackedByGenotype {
public GenomeLoc loc; // genome location of SNP
// Reference sequence chromosome or scaffold
// Start and stop positions in chrom

View File

@ -2,8 +2,8 @@ package org.broadinstitute.sting.gatk.walkers.indels;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.VariationRod;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
@ -11,7 +11,7 @@ import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.genotype.Variation;
@WalkerName("SNPClusters")
@Requires(value={DataSource.REFERENCE},referenceMetaData={@RMD(name="snps",type=AllelicVariant.class)})
@Requires(value={DataSource.REFERENCE},referenceMetaData={@RMD(name="snps",type= VariationRod.class)})
public class SNPClusterWalker extends RefWalker<GenomeLoc, GenomeLoc> {
@Argument(fullName="windowSize", shortName="window", doc="window size for calculating clusters", required=false)
int windowSize = 10;

View File

@ -1,20 +1,20 @@
package org.broadinstitute.sting.playground.gatk.walkers;
import java.util.List;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.Genotype;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.Genotype;
import org.broadinstitute.sting.gatk.walkers.RefWalker;
import org.broadinstitute.sting.playground.utils.GenotypingCallStats;
import org.broadinstitute.sting.playground.utils.TrioConcordanceRecord;
import org.broadinstitute.sting.utils.GenotypeUtils;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import java.util.List;
//@Requires(value=DataSource.REFERENCE,referenceMetaData={@RMD(name="mother",type=rodSAMPileup.class),
// @RMD(name="father",type=rodSAMPileup.class),
// @RMD(name="daughter",type=rodSAMPileup.class)})

View File

@ -83,7 +83,7 @@ public class HardyWeinbergEquilibrium extends BasicVariantAnalysis implements Po
if ( false ) {
System.out.printf("HWE-violation at %s %f < %f %1.2f %5d %5d %5d %5d %5d %.2e %.2e %.2e => %.6e [%s]%n",
call.getLocation(), p, threshold, call.getMAF(), nAA, nAa, naa, nA, n, m, b, tosses, p, call);
call.getLocation(), p, threshold, call.getNonRefAlleleFrequency(), nAA, nAa, naa, nA, n, m, b, tosses, p, call);
System.out.printf("(factorial(%d) / (factorial(%d) * factorial(%d) * factorial(%d))) / choose(%d, %d) * 2^%d - %f < 1e-3%n",
nAA + nAa + naa, nAA, nAa, naa, 2 * n, nA, nAa, p);
}

View File

@ -1,12 +1,12 @@
package org.broadinstitute.sting.utils;
import java.util.Arrays;
import java.util.List;
import org.broadinstitute.sting.gatk.refdata.Genotype;
import org.broadinstitute.sting.gatk.refdata.GenotypeList;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.util.Arrays;
import java.util.List;
/** Holds useful utility methods and auxiliary default classes for working with Genotype objects
*
@ -77,15 +77,19 @@ public class GenotypeUtils {
else throw new StingException("track "+rod.getName()+" is not a Genotype or GenotypeList");
}
public static boolean isHet(AllelicVariant var) {
public static boolean isHet(Variation var) {
if ( var instanceof Genotype )
return ((Genotype)var).isHet();
List<String> genotype = var.getGenotype();
if ( genotype.size() < 1 )
String genotype = var.getAlternateBases();
if ( genotype.length() < 1 )
return false;
return genotype.get(0).charAt(0) != genotype.get(0).charAt(1);
char first = genotype.charAt(0);
for (char base : genotype.toCharArray()) {
if (base != first) return true;
}
return false;
}
/** This class represents a "default" indel-type genotype with homozygous reference (i.e. confidently no indel)

View File

@ -0,0 +1,52 @@
package org.broadinstitute.sting.utils;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.genotype.BasicVariation;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.File;
import java.io.FileNotFoundException;
/**
*
* @author aaron
*
* Class GenotypeUtilsTest
*
* a test class for the various function in the genotype utils class
*/
public class GenotypeUtilsTest extends BaseTest {
private static IndexedFastaSequenceFile seq;
private static File vcfFile = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample.vcf");
@BeforeClass
public static void beforeTests() {
try {
seq = new IndexedFastaSequenceFile(new File("/broad/1KG/reference/human_b36_both.fasta"));
} catch (FileNotFoundException e) {
throw new StingException("unable to load the sequence dictionary");
}
GenomeLocParser.setupRefContigOrdering(seq);
}
/**
* make sure that the variation is a het
*/
@Test
public void isHetTest() {
GenomeLoc loc = GenomeLocParser.createGenomeLoc(0,1,2);
BasicVariation var = new BasicVariation("AA", "A", 0, loc,0.0);
Assert.assertTrue(!GenotypeUtils.isHet(var));
BasicVariation var2 = new BasicVariation("AG", "A", 0, loc,0.0);
Assert.assertTrue(GenotypeUtils.isHet(var2));
BasicVariation var3 = new BasicVariation("GG", "A", 0, loc,0.0);
Assert.assertTrue(!GenotypeUtils.isHet(var3));
}
}