intermediate commit for some changes in the Variation system, so Eric can go ahead with his changes. Everything is pretty set, but the Variation interface could use a convenience method that joins all the alternate alleles.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1903 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-10-23 06:31:15 +00:00
parent 6c338eccb8
commit ad1fc511b1
23 changed files with 585 additions and 508 deletions

View File

@ -2,8 +2,9 @@ package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.Genotype;
import java.util.ArrayList; import java.util.Arrays;
import java.util.List; import java.util.List;
/** /**
* loc ref alt EM_alt_freq discovery_likelihood discovery_null discovery_prior discovery_lod EM_N n_ref n_het n_hom * loc ref alt EM_alt_freq discovery_likelihood discovery_null discovery_prior discovery_lod EM_N n_ref n_het n_hom
* chr1:1104840 A N 0.000000 -85.341265 -85.341265 0.000000 0.000000 324.000000 162 0 0 * chr1:1104840 A N 0.000000 -85.341265 -85.341265 0.000000 0.000000 324.000000 162 0 0
@ -23,28 +24,6 @@ public class PooledEMSNPROD extends TabularROD implements SNPCallFromGenotypes,
public char getAltSnpFWD() throws IllegalStateException { return getAltBasesFWD().charAt(0); } public char getAltSnpFWD() throws IllegalStateException { return getAltBasesFWD().charAt(0); }
public boolean isReference() { return getVariationConfidence() < 0.01; } public boolean isReference() { return getVariationConfidence() < 0.01; }
/**
* gets the alternate base. Use this method if we're biallelic
*
* @return
*/
@Override
public String getAlternateBases() {
return getAltBasesFWD();
}
/**
* gets the alternate bases. Use this method if the allele count is greater then 2
*
* @return
*/
@Override
public List<String> getAlternateBaseList() {
List<String> str = new ArrayList<String>();
str.add(this.getAltBasesFWD());
return str;
}
/** /**
* get the frequency of this variant * get the frequency of this variant
* *
@ -120,6 +99,33 @@ public class PooledEMSNPROD extends TabularROD implements SNPCallFromGenotypes,
return this.getVariationConfidence(); return this.getVariationConfidence();
} }
/**
* gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference base. This is returned as a string list with no guarantee ordering
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
* frequency).
*
* @return an alternate allele list
*/
@Override
public List<String> getAlternateAlleleList() {
return Arrays.asList(getAltBasesFWD());
}
/**
* gets the alleles. This method should return all the alleles present at the location,
* including the reference base. The first allele should always be the reference allele, followed
* by an unordered list of alternate alleles.
*
* @return an alternate allele list
*/
@Override
public List<String> getAlleleList() {
List<String> alleles = Arrays.asList(this.getReference());
alleles.addAll(getAlternateAlleleList());
return alleles;
}
public int length() { return 1; } public int length() { return 1; }
// SNPCallFromGenotypes interface // SNPCallFromGenotypes interface

View File

@ -161,10 +161,9 @@ public class RodGLF implements VariationRod, Iterator<RodGLF> {
@Override @Override
public char getAlternativeBaseForSNP() { public char getAlternativeBaseForSNP() {
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP"); if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
if (getAlternateBases().charAt(0) == this.getReference().charAt(0)) List<String> alleles = this.getAlternateAlleleList();
return getAlternateBases().charAt(1); if (alleles.size() != 1) throw new StingException("We're not biAllelic()");
return getAlternateBases().charAt(0); return Utils.stringToChar(alleles.get(0));
} }
/** /**
@ -175,10 +174,7 @@ public class RodGLF implements VariationRod, Iterator<RodGLF> {
@Override @Override
public char getReferenceForSNP() { public char getReferenceForSNP() {
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP"); if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
if (getAlternateBases().charAt(0) == this.getReference().charAt(0)) return Utils.stringToChar(getReference());
return getAlternateBases().charAt(0);
return getAlternateBases().charAt(1);
} }
/** /**
@ -207,7 +203,7 @@ public class RodGLF implements VariationRod, Iterator<RodGLF> {
* Get the nth best genotype (one based), i.e. to get the best genotype pass in 1, * Get the nth best genotype (one based), i.e. to get the best genotype pass in 1,
* the second best 2, etdc. * the second best 2, etdc.
* *
* @param nthBest the nth best genotype to get * @param nthBest the nth best genotype to get (1 based, NOT ZERO BASED)
* *
* @return a GENOTYPE object representing the nth best genotype * @return a GENOTYPE object representing the nth best genotype
*/ */
@ -253,28 +249,6 @@ public class RodGLF implements VariationRod, Iterator<RodGLF> {
((VariableLengthCall) mRecord).getIndelLen1() < 0); ((VariableLengthCall) mRecord).getIndelLen1() < 0);
} }
/**
* get the base representation of this Variant
*
* @return a string, of ploidy
*/
@Override
public String getAlternateBases() {
return this.getBestGenotype(1).toString();
}
/**
* gets the alternate bases. Use this method if teh allele count is greater then 2
*
* @return
*/
@Override
public List<String> getAlternateBaseList() {
List<String> list = new ArrayList<String>();
list.add(this.getAlternateBases());
return list;
}
/** /**
* Returns minor allele frequency. * Returns minor allele frequency.
* *
@ -310,6 +284,42 @@ public class RodGLF implements VariationRod, Iterator<RodGLF> {
return Math.abs(getBestGenotypeValue(1) - ((SinglePointCall) mRecord).getLikelihoods()[index]) / GLFRecord.LIKELIHOOD_SCALE_FACTOR; return Math.abs(getBestGenotypeValue(1) - ((SinglePointCall) mRecord).getLikelihoods()[index]) / GLFRecord.LIKELIHOOD_SCALE_FACTOR;
} }
/**
* gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference base. This is returned as a string list with no guarantee ordering
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
* frequency).
*
* @return an alternate allele list
*/
@Override
public List<String> getAlternateAlleleList() {
LikelihoodObject.GENOTYPE genotype = getBestGenotype(1);
List<String> ret = new ArrayList<String>();
for (char c : genotype.toString().toCharArray()) {
if (!String.valueOf(c).equals(this.getReference())) ret.add(String.valueOf(c));
}
return ret;
}
/**
* gets the alleles. This method should return all the alleles present at the location,
* including the reference base. The first allele should always be the reference allele, followed
* by an unordered list of alternate alleles.
*
* @return an alternate allele list
*/
@Override
public List<String> getAlleleList() {
LikelihoodObject.GENOTYPE genotype = getBestGenotype(1);
List<String> list = new ArrayList<String>();
if (genotype.toString().contains(this.getReference())) list.add(this.getReference());
for (char c : genotype.toString().toCharArray())
if (c != Utils.stringToChar(getReference()))
list.add(String.valueOf(c));
return list;
}
public int length() { public int length() {
return 1; return 1;
} }

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.BasicGenotype; import org.broadinstitute.sting.utils.genotype.BasicGenotype;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype; import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.Genotype;
@ -134,6 +135,40 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements Variation
return Math.abs(lodBtr); return Math.abs(lodBtr);
} }
/**
* gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference base. This is returned as a string list with no guarantee ordering
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
* frequency).
*
* @return an alternate allele list
*/
@Override
public List<String> getAlternateAlleleList() {
List<String> list = new ArrayList<String>();
for (char base : bestGenotype.toCharArray())
if (base != refBase)
list.add(String.valueOf(base));
return list;
}
/**
* gets the alleles. This method should return all the alleles present at the location,
* including the reference base. The first allele should always be the reference allele, followed
* by an unordered list of alternate alleles.
*
* @return an alternate allele list
*/
@Override
public List<String> getAlleleList() {
List<String> list = new ArrayList<String>();
if (this.bestGenotype.contains(getReference())) list.add(getReference());
for (char c : this.bestGenotype.toCharArray())
if (c != Utils.stringToChar(getReference()))
list.add(String.valueOf(c));
return list;
}
public String getRefBasesFWD() { public String getRefBasesFWD() {
return String.format("%c", getRefSnpFWD()); return String.format("%c", getRefSnpFWD());
} }
@ -147,7 +182,7 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements Variation
} }
public char getAltSnpFWD() throws IllegalStateException { public char getAltSnpFWD() throws IllegalStateException {
// both ref and bestGenotype have been uppercased, so it's safe to use == // both ref and bestGenotype have been uppercased, so it's safe to use ==
char c = (bestGenotype.charAt(0) == refBase) ? bestGenotype.charAt(1) : bestGenotype.charAt(0); char c = (bestGenotype.charAt(0) == refBase) ? bestGenotype.charAt(1) : bestGenotype.charAt(0);
//System.out.printf("%s : %c and %c%n", bestGenotype, refBase, c); //System.out.printf("%s : %c and %c%n", bestGenotype, refBase, c);
return c; return c;
@ -187,28 +222,6 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements Variation
return false; return false;
} }
/**
* get the base representation of this Variant
*
* @return a string, of ploidy
*/
@Override
public String getAlternateBases() {
return this.bestGenotype;
}
/**
* gets the alternate bases. If this is homref, throws an UnsupportedOperationException
*
* @return
*/
@Override
public List<String> getAlternateBaseList() {
List<String> list = new ArrayList<String>();
list.add(this.getAlternateBases());
return list;
}
public boolean isIndel() { public boolean isIndel() {
return false; return false;
} }

View File

@ -2,10 +2,13 @@ package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.*; import org.broadinstitute.sting.utils.genotype.BasicGenotype;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype; import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
import java.util.*; import java.util.*;
import java.util.regex.MatchResult; import java.util.regex.MatchResult;
import java.util.regex.Pattern; import java.util.regex.Pattern;
@ -96,6 +99,34 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements
return 4; // 1/10000 error return 4; // 1/10000 error
} }
/**
* gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference base. This is returned as a string list with no guarantee ordering
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
* frequency).
*
* @return an alternate allele list
*/
@Override
public List<String> getAlternateAlleleList() {
throw new StingException("Hapmap is unable to provide an alternate allele list; the reference is unknown");
}
/**
* gets the alleles. This method should return all the alleles present at the location,
* including the reference base. The first allele should always be the reference allele, followed
* by an unordered list of alternate alleles.
*
* @return an alternate allele list
*/
@Override
public List<String> getAlleleList() {
List<String> ret = new ArrayList<String>();
for (char c : feature.toCharArray())
ret.add(String.valueOf(c));
return ret;
}
public String getAttribute(final String key) { public String getAttribute(final String key) {
return attributes.get(key); return attributes.get(key);
} }
@ -181,28 +212,6 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements
public char getAltSnpFWD() throws IllegalStateException { return 0; } public char getAltSnpFWD() throws IllegalStateException { return 0; }
public boolean isReference() { return ! isSNP(); } public boolean isReference() { return ! isSNP(); }
/**
* gets the alternate bases. If this is homref, throws an UnsupportedOperationException
*
* @return
*/
@Override
public String getAlternateBases() {
return this.feature;
}
/**
* gets the alternate bases. Use this method if teh allele count is greater then 2
*
* @return
*/
@Override
public List<String> getAlternateBaseList() {
List<String> list = new ArrayList<String>();
list.add(this.getAlternateBases());
return list;
}
/** /**
* get the frequency of this variant * get the frequency of this variant
* *

View File

@ -183,7 +183,7 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
/** are we bi-allelic? */ /** are we bi-allelic? */
@Override @Override
public boolean isBiallelic() { public boolean isBiallelic() {
return (this.getAlternateBaseList().size() == 1); return (this.getAlternateAlleleList().size() == 1);
} }
/** /**
@ -197,6 +197,37 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
return mCurrentRecord.getQual() / 10.0; return mCurrentRecord.getQual() / 10.0;
} }
/**
* gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference base. This is returned as a string list with no guarantee ordering
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
* frequency).
*
* @return an alternate allele list
*/
@Override
public List<String> getAlternateAlleleList() {
List<String> list = new ArrayList<String>();
for (VCFGenotypeEncoding enc : mCurrentRecord.getAlternateAlleles())
list.add(enc.toString());
return list;
}
/**
* gets the alleles. This method should return all the alleles present at the location,
* including the reference base. The first allele should always be the reference allele, followed
* by an unordered list of alternate alleles.
*
* @return an alternate allele list
*/
@Override
public List<String> getAlleleList() {
List<String> ret = new ArrayList<String>();
ret.add(String.valueOf(mCurrentRecord.getReferenceBase()));
ret.addAll(getAlternateAlleleList());
return ret;
}
/** /**
* are we truely a variant, given a reference * are we truely a variant, given a reference
* *
@ -207,31 +238,6 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
return (!mCurrentRecord.hasAlternateAllele()); return (!mCurrentRecord.hasAlternateAllele());
} }
/**
* gets the alternate bases. If this is homref, throws an UnsupportedOperationException
*
* @return
*/
@Override
public String getAlternateBases() {
if (!this.isBiallelic())
throw new UnsupportedOperationException("We're not biallelic, so please call getAlternateBaseList instead");
return this.mCurrentRecord.getAlternateAlleles().get(0).toString();
}
/**
* gets the alternate bases. If this is homref, throws an UnsupportedOperationException
*
* @return
*/
@Override
public List<String> getAlternateBaseList() {
List<String> list = new ArrayList<String>();
for (VCFGenotypeEncoding enc : mCurrentRecord.getAlternateAlleles())
list.add(enc.toString());
return list;
}
/** /**
* are we an insertion or a deletion? yes, then return true. No? Well, false then. * are we an insertion or a deletion? yes, then return true. No? Well, false then.
* *

View File

@ -1,9 +1,10 @@
package org.broadinstitute.sting.gatk.refdata; package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.Genotype;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.List; import java.util.List;
public class SangerSNPROD extends TabularROD implements SNPCallFromGenotypes { public class SangerSNPROD extends TabularROD implements SNPCallFromGenotypes {
@ -100,23 +101,35 @@ public class SangerSNPROD extends TabularROD implements SNPCallFromGenotypes {
} }
/** /**
* gets the alternate base. Use this method if we're biallelic * gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference base. This is returned as a string list with no guarantee ordering
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
* frequency).
* *
* @return * @return an alternate allele list
*/ */
@Override @Override
public String getAlternateBases() { public List<String> getAlternateAlleleList() {
return this.get("3"); List<String> ret = new ArrayList<String>();
for (char c : get("3").toCharArray())
ret.add(String.valueOf(c));
return ret;
} }
/** /**
* gets the alternate bases. Use this method if the allele count is greater then 2 (not biallelic) * gets the alleles. This method should return all the alleles present at the location,
* including the reference base. The first allele should always be the reference allele, followed
* by an unordered list of alternate alleles.
* *
* @return * @return an alternate allele list
*/ */
@Override @Override
public List<String> getAlternateBaseList() { public List<String> getAlleleList() {
return Arrays.asList(this.get("3")); List<String> ret = new ArrayList<String>();
ret.add(this.getReference());
for (char c : get("3").toCharArray())
ret.add(String.valueOf(c));
return ret;
} }
public int length() { return 1; } public int length() { return 1; }

View File

@ -5,6 +5,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.Variation; import org.broadinstitute.sting.utils.genotype.Variation;
import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
@ -96,23 +97,31 @@ public class SequenomROD extends TabularROD implements Variation {
} }
/** /**
* gets the alternate bases. Use this method if we're biallelic * gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference base. This is returned as a string list with no guarantee ordering
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
* frequency).
* *
* @return * @return an alternate allele list
*/ */
@Override @Override
public String getAlternateBases() { public List<String> getAlternateAlleleList() {
return getAltBasesFWD(); List<String> ret = new ArrayList<String>();
for (char c: getAltBasesFWD().toCharArray())
ret.add(String.valueOf(c));
return ret;
} }
/** /**
* gets the alternate bases. Use this method if the allele count is greater then 2 (not biallelic) * gets the alleles. This method should return all the alleles present at the location,
* including the reference base. The first allele should always be the reference allele, followed
* by an unordered list of alternate alleles.
* *
* @return * @return an alternate allele list
*/ */
@Override @Override
public List<String> getAlternateBaseList() { public List<String> getAlleleList() {
throw new StingException("SequenomRod is not biallelic"); throw new StingException("SequenomRod doesn't know of the reference, and can't generate allele lists");
} }
public boolean isHom() { return false; } public boolean isHom() { return false; }

View File

@ -67,26 +67,6 @@ public class SimpleIndelROD extends TabularROD implements Genotype, VariationRod
public boolean isSNP() { return false; } public boolean isSNP() { return false; }
public boolean isReference() { return false; } public boolean isReference() { return false; }
/**
* gets the alternate base. Use this method if we're biallelic
*
* @return
*/
@Override
public String getAlternateBases() {
return getFWDAlleles().get(0);
}
/**
* gets the alternate bases. Use this method if teh allele count is greater then 2
*
* @return
*/
@Override
public List<String> getAlternateBaseList() {
return getFWDAlleles();
}
public boolean isInsertion() { public boolean isInsertion() {
if ( is1KGFormat() ) if ( is1KGFormat() )
return this.get("3").equals("I"); return this.get("3").equals("I");
@ -135,6 +115,35 @@ public class SimpleIndelROD extends TabularROD implements Genotype, VariationRod
return getVariationConfidence(); return getVariationConfidence();
} }
/**
* gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference base. This is returned as a string list with no guarantee ordering
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
* frequency).
*
* @return an alternate allele list
*/
@Override
public List<String> getAlternateAlleleList() {
List<String> ret = getAlleleList();
for (String val : ret) {
if (val.equals(this.getReference())) ret.remove(val);
}
return ret;
}
/**
* gets the alleles. This method should return all the alleles present at the location,
* including the reference base. The first allele should always be the reference allele, followed
* by an unordered list of alternate alleles.
*
* @return an alternate allele list
*/
@Override
public List<String> getAlleleList() {
return this.getFWDAlleles();
}
public boolean isHom() { return false; } public boolean isHom() { return false; }
public boolean isHet() { return false; } public boolean isHet() { return false; }
public double getHeterozygosity() { return 0.0; } public double getHeterozygosity() { return 0.0; }

View File

@ -4,10 +4,12 @@ import net.sf.samtools.util.SequenceUtil;
import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.genotype.BasicGenotype; import org.broadinstitute.sting.utils.genotype.BasicGenotype;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype; import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype; import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections;
import java.util.List; import java.util.List;
/** /**
@ -22,8 +24,8 @@ import java.util.List;
*/ */
public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod, VariantBackedByGenotype { public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod, VariantBackedByGenotype {
public GenomeLoc loc; // genome location of SNP public GenomeLoc loc; // genome location of SNP
// Reference sequence chromosome or scaffold // Reference sequence chromosome or scaffold
// Start and stop positions in chrom // Start and stop positions in chrom
public String name; // Reference SNP identifier or Affy SNP name public String name; // Reference SNP identifier or Affy SNP name
public String strand; // Which DNA strand contains the observed alleles public String strand; // Which DNA strand contains the observed alleles
@ -33,18 +35,18 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
public String molType; // Sample type from exemplar ss public String molType; // Sample type from exemplar ss
public String varType; // The class of variant (simple, insertion, deletion, range, etc.) public String varType; // The class of variant (simple, insertion, deletion, range, etc.)
// Can be 'unknown','single','in-del','het','microsatellite','named','mixed','mnp','insertion','deletion' // Can be 'unknown','single','in-del','het','microsatellite','named','mixed','mnp','insertion','deletion'
public String validationStatus; // The validation status of the SNP public String validationStatus; // The validation status of the SNP
// one of set('unknown','by-cluster','by-frequency','by-submitter','by-2hit-2allele','by-hapmap') // one of set('unknown','by-cluster','by-frequency','by-submitter','by-2hit-2allele','by-hapmap')
public double avHet; // The average heterozygosity from all observations public double avHet; // The average heterozygosity from all observations
public double avHetSE; // The Standard Error for the average heterozygosity public double avHetSE; // The Standard Error for the average heterozygosity
public String func; // The functional category of the SNP (coding-synon, coding-nonsynon, intron, etc.) public String func; // The functional category of the SNP (coding-synon, coding-nonsynon, intron, etc.)
// set('unknown','coding-synon','intron','cds-reference','near-gene-3','near-gene-5', // set('unknown','coding-synon','intron','cds-reference','near-gene-3','near-gene-5',
// 'nonsense','missense','frameshift','untranslated-3','untranslated-5','splice-3','splice-5') // 'nonsense','missense','frameshift','untranslated-3','untranslated-5','splice-3','splice-5')
public String locType; // How the variant affects the reference sequence public String locType; // How the variant affects the reference sequence
// enum('range','exact','between','rangeInsertion','rangeSubstitution','rangeDeletion') // enum('range','exact','between','rangeInsertion','rangeSubstitution','rangeDeletion')
public int weight; // The quality of the alignment public int weight; // The quality of the alignment
@ -73,7 +75,7 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
*/ */
@Override @Override
public String getReference() { public String getReference() {
return getRefBasesFWD(); return refBases;
} }
/** /**
@ -86,58 +88,43 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
return 4; // -log10(0.0001) return 4; // -log10(0.0001)
} }
/**
* gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference base. This is returned as a string list with no guarantee ordering
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
* frequency).
*
* @return an alternate allele list
*/
public List<String> getAlternateAlleleList() {
List<String> ret = getAlleleList();
for (String allele : ret)
if (allele.equals(getReference())) ret.remove(allele);
return ret;
}
/**
* gets the alleles. This method should return all the alleles present at the location,
* including the reference base. The first allele should always be the reference allele, followed
* by an unordered list of alternate alleles.
*
* @return an alternate allele list
*/
public List<String> getAlleleList() {
List<String> ret; //ref first!!!!!
if (onFwdStrand())
ret = Arrays.asList(observed.split("/"));
else
ret = Arrays.asList(SequenceUtil.reverseComplement(observed).split("/"));
if (ret.size() > 0 && ret.contains(getReference()) && !ret.get(0).equals(this.getReference()))
Collections.swap(ret,ret.indexOf(getReference()),0);
return ret;
}
public boolean onFwdStrand() { public boolean onFwdStrand() {
return strand.equals("+"); return strand.equals("+");
} }
/**
* Returns bases in the reference allele as a String. String can be empty (as in insertion into
* the reference), can contain a single character (as in SNP or one-base deletion), or multiple characters
* (for longer indels).
*
* @return reference allele, forward strand
*/
public String getRefBasesFWD() {
// fix - at least this way we ensure that we'll get the other base compared to getAltBasesFWD()
return (getAllelesFWD().get(0).equals(refBases)) ? getAllelesFWD().get(0) : getAllelesFWD().get(1);
//if ( onFwdStrand() )
// return refBases;
//else
// return SequenceUtil.reverseComplement(refBases);
}
/**
* Returns reference (major) allele base for a SNP variant as a character; should throw IllegalStateException
* if variant is not a SNP.
*
* @return reference base on the forward strand
*/
public char getRefSnpFWD() throws IllegalStateException {
//System.out.printf("refbases is %s but %s%n", refBases, toString());
if (isIndel()) throw new IllegalStateException("Variant is not a SNP");
// fix - at least this way we ensure that we'll get the other base compared to getAltBasesFWD()
List<String> alleles = getAllelesFWD();
String val = (alleles.get(0).equals(refBases) ? alleles.get(0) : alleles.get(1));
return val.charAt(0);
// if ( onFwdStrand() ) return refBases.charAt(0);
// else return SequenceUtil.reverseComplement(refBases).charAt(0);
}
public List<String> getAllelesFWD() {
List<String> alleles = null;
if (onFwdStrand())
alleles = Arrays.asList(observed.split("/"));
else
alleles = Arrays.asList(SequenceUtil.reverseComplement(observed).split("/"));
//System.out.printf("getAlleles %s on %s %b => %s %n", observed, strand, onFwdStrand(), Utils.join("/", alleles));
return alleles;
}
public String getAllelesFWDString() {
return Utils.join("", getAllelesFWD());
}
/** /**
* get the frequency of this variant * get the frequency of this variant
* *
@ -145,7 +132,7 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
*/ */
@Override @Override
public double getNonRefAlleleFrequency() { public double getNonRefAlleleFrequency() {
return 0; //To change body of implemented methods use File | Settings | File Templates. return 0; // dbSNP doesn't know the allele frequency
} }
/** @return the VARIANT_TYPE of the current variant */ /** @return the VARIANT_TYPE of the current variant */
@ -170,28 +157,6 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
return varType.contains("deletion"); return varType.contains("deletion");
} }
/**
* get the base representation of this Variant
*
* @return a string, of ploidy
*/
@Override
public String getAlternateBases() {
return getAllelesFWDString();
}
/**
* gets the alternate bases. Use this method if teh allele count is greater then 2
*
* @return
*/
@Override
public List<String> getAlternateBaseList() {
List<String> list = new ArrayList<String>();
list.add(this.getAlternateBases());
return list;
}
public boolean isIndel() { public boolean isIndel() {
return isInsertion() || isDeletion() || varType.contains("in-del"); return isInsertion() || isDeletion() || varType.contains("in-del");
} }
@ -204,11 +169,12 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
*/ */
@Override @Override
public char getAlternativeBaseForSNP() { public char getAlternativeBaseForSNP() {
return getAltSnpFWD(); /* if (!isSNP()) throw new StingException("We're not a SNP; called in DbSNP rod at position " + this.loc);
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP"); if (!isBiallelic()) throw new StingException("We're not biallelic; at position " + this.loc);
if (getAlternateBases().charAt(0) == this.getReference()) List<String> ret = this.getAlternateAlleleList();
return getAlternateBases().charAt(1); if (ret.size() == 1 && ret.get(0).length() == 1)
return getAlternateBases().charAt(0); */ return ret.get(0).charAt(0);
throw new StingException("getAlternativeBaseForSNP failed for DbSNP rod " + this.loc);
} }
/** /**
@ -218,12 +184,14 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
*/ */
@Override @Override
public char getReferenceForSNP() { public char getReferenceForSNP() {
return 0; //To change body of implemented methods use File | Settings | File Templates. if (!isSNP()) throw new StingException("We're not a SNP; called in DbSNP rod at position " + this.loc);
if (refBases.length() != 1) throw new StingException("The reference base in DbSNP must be zero, at position " + this.loc + " was " + refBases);
return refBases.charAt(0); // we know it's length 1, this is ok
} }
public boolean isReference() { public boolean isReference() {
return false; return false; // snp locations are never "reference", there's always a variant
} // snp locations are never "reference", there's always a variant }
public boolean isHapmap() { public boolean isHapmap() {
return validationStatus.contains("by-hapmap"); return validationStatus.contains("by-hapmap");
@ -250,7 +218,7 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
} }
public String toMediumString() { public String toMediumString() {
String s = String.format("%s:%s:%s", getLocation().toString(), name, getAllelesFWDString()); String s = String.format("%s:%s:%s", getLocation().toString(), name, Utils.join("",this.getAlleleList()));
if (isSNP()) s += ":SNP"; if (isSNP()) s += ":SNP";
if (isIndel()) s += ":Indel"; if (isIndel()) s += ":Indel";
if (isHapmap()) s += ":Hapmap"; if (isHapmap()) s += ":Hapmap";
@ -300,49 +268,12 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
} }
} }
public String getAltBasesFWD() {
List<String> alleles = getAllelesFWD();
return (alleles.get(0).equals(refBases) ? alleles.get(1) : alleles.get(0));
}
public char getAltSnpFWD() throws IllegalStateException {
if (!isSNP())
throw new IllegalStateException("I'm not a SNP");
return getAltBasesFWD().charAt(0);
}
public double getConsensusConfidence() {
// TODO Auto-generated method stub
return Double.MAX_VALUE;
}
public List<String> getGenotype() throws IllegalStateException {
return Arrays.asList(Utils.join("", getAllelesFWD()));
}
public double getMAF() {
// Fixme: update to actually get MAF
//return avHet;
return -1;
}
public double getHeterozygosity() { public double getHeterozygosity() {
return avHet; return avHet;
} }
public int getPloidy() throws IllegalStateException { public int getPloidy() throws IllegalStateException {
// TODO Auto-generated method stub return 2; // our DbSNP assumes a diploid human
return 0;
}
public double getVariationConfidence() {
// TODO Auto-generated method stub
return Double.MAX_VALUE;
}
public boolean isGenotype() {
// TODO Auto-generated method stub
return false;
} }
public boolean isBiallelic() { public boolean isBiallelic() {
@ -350,10 +281,6 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
return observed.indexOf('/') == observed.lastIndexOf('/'); return observed.indexOf('/') == observed.lastIndexOf('/');
} }
public int length() {
return (int) (loc.getStop() - loc.getStart() + 1);
}
/** /**
* get the genotype * get the genotype
* *
@ -361,7 +288,10 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
*/ */
@Override @Override
public org.broadinstitute.sting.utils.genotype.Genotype getCalledGenotype() { public org.broadinstitute.sting.utils.genotype.Genotype getCalledGenotype() {
return new BasicGenotype(this.getLocation(), this.getAltBasesFWD(), this.getRefSnpFWD(), this.getConsensusConfidence()); return new BasicGenotype(getLocation(),
BasicGenotype.alleleListToString(getAlleleList()),
Utils.stringToChar(getReference()),
getNegLog10PError());
} }
/** /**
@ -371,11 +301,12 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
*/ */
@Override @Override
public List<org.broadinstitute.sting.utils.genotype.Genotype> getGenotypes() { public List<org.broadinstitute.sting.utils.genotype.Genotype> getGenotypes() {
List<org.broadinstitute.sting.utils.genotype.Genotype> list = new ArrayList<org.broadinstitute.sting.utils.genotype.Genotype>(); ArrayList<Genotype> list = new ArrayList<Genotype>();
list.add(new BasicGenotype(this.getLocation(), this.getAltBasesFWD(), this.getRefSnpFWD(), this.getConsensusConfidence())); list.add(getCalledGenotype());
return list; return list;
} }
/** /**
* do we have the specified genotype? not all backedByGenotypes * do we have the specified genotype? not all backedByGenotypes
* have all the genotype data. * have all the genotype data.
@ -386,21 +317,21 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
*/ */
@Override @Override
public boolean hasGenotype(DiploidGenotype x) { public boolean hasGenotype(DiploidGenotype x) {
return (!x.toString().equals(this.getAltBasesFWD())) ? false : true; return (!x.toString().equals(BasicGenotype.alleleListToString(getAlleleList()))) ? false : true;
} }
public static rodDbSNP getFirstRealSNP(RODRecordList<ReferenceOrderedDatum> dbsnpList) { public static rodDbSNP getFirstRealSNP(RODRecordList<ReferenceOrderedDatum> dbsnpList) {
if ( dbsnpList == null ) if (dbsnpList == null)
return null; return null;
rodDbSNP dbsnp = null; rodDbSNP dbsnp = null;
for ( ReferenceOrderedDatum d : dbsnpList ) { for (ReferenceOrderedDatum d : dbsnpList) {
if ( ((rodDbSNP)d).isSNP() ) { if (((rodDbSNP) d).isSNP()) {
dbsnp = (rodDbSNP)d; dbsnp = (rodDbSNP) d;
break; break;
} }
} }
return dbsnp; return dbsnp;
} }
} }

View File

@ -49,19 +49,20 @@ public class FastaAlternateReferenceWalker extends FastaReferenceWalker {
continue; continue;
// if we have multiple variants at a locus, just take the first damn one we see for now // if we have multiple variants at a locus, just take the first damn one we see for now
Variation variant = (Variation) rod; Variation variant = (Variation) rod;
if (variant.getAlleleList().size() != 2) System.err.println("Not two " + Utils.join("-",variant.getAlleleList()));
if (!rod.getName().startsWith("snpmask") && variant.isDeletion()) { if (!rod.getName().startsWith("snpmask") && variant.isDeletion()) {
deletionBasesRemaining = variant.getAlternateBases().length(); deletionBasesRemaining = variant.getAlleleList().get(0).length();
basesSeen++; basesSeen++;
if (indelsWriter != null) if (indelsWriter != null)
indelsWriter.println(fasta.getCurrentID() + ":" + basesSeen + "-" + (basesSeen + variant.getAlternateBases().length())); indelsWriter.println(fasta.getCurrentID() + ":" + basesSeen + "-" + (basesSeen + variant.getAlleleList().get(0).length()));
// delete the next n bases, not this one // delete the next n bases, not this one
return new Pair<GenomeLoc, String>(context.getLocation(), refBase); return new Pair<GenomeLoc, String>(context.getLocation(), refBase);
} else if (!rod.getName().startsWith("snpmask") && variant.isInsertion()) { } else if (!rod.getName().startsWith("snpmask") && variant.isInsertion()) {
basesSeen++; basesSeen++;
if (indelsWriter != null) if (indelsWriter != null)
indelsWriter.println(fasta.getCurrentID() + ":" + basesSeen + "-" + (basesSeen + variant.getAlternateBases().length())); indelsWriter.println(fasta.getCurrentID() + ":" + basesSeen + "-" + (basesSeen + variant.getAlleleList().get(0).length()));
basesSeen += variant.getAlternateBases().length(); basesSeen += variant.getAlleleList().get(0).length();
return new Pair<GenomeLoc, String>(context.getLocation(), refBase.concat(variant.getAlternateBases())); return new Pair<GenomeLoc, String>(context.getLocation(), refBase.concat(Utils.join("",variant.getAlleleList())));
} else if (variant.isSNP()) { } else if (variant.isSNP()) {
basesSeen++; basesSeen++;
return new Pair<GenomeLoc, String>(context.getLocation(), (rod.getName().startsWith("snpmask") ? "N" : String.valueOf(variant.getAlternativeBaseForSNP()))); return new Pair<GenomeLoc, String>(context.getLocation(), (rod.getName().startsWith("snpmask") ? "N" : String.valueOf(variant.getAlternativeBaseForSNP())));

View File

@ -70,9 +70,9 @@ public class PickSequenomProbes extends RefWalker<String, String> {
if ( variant.isSNP() ) if ( variant.isSNP() )
assay_sequence = leading_bases + "[" + refBase + "/" + variant.getAlternativeBaseForSNP() + "]" + trailing_bases; assay_sequence = leading_bases + "[" + refBase + "/" + variant.getAlternativeBaseForSNP() + "]" + trailing_bases;
else if ( variant.isInsertion() ) else if ( variant.isInsertion() )
assay_sequence = leading_bases + refBase + "[-/" + variant.getAlternateBases() + "]" + trailing_bases; assay_sequence = leading_bases + refBase + "[-/" + Utils.join("",variant.getAlleleList()) + "]" + trailing_bases;
else if ( variant.isDeletion() ) else if ( variant.isDeletion() )
assay_sequence = leading_bases + refBase + "[" + variant.getAlternateBases() + "/-]" + trailing_bases.substring(variant.getAlternateBases().length()); assay_sequence = leading_bases + refBase + "[" + Utils.join("",variant.getAlleleList()) + "/-]" + trailing_bases.substring(variant.getAlleleList().size());
else else
return ""; return "";

View File

@ -1,22 +1,22 @@
package org.broadinstitute.sting.playground.gatk.walkers; package org.broadinstitute.sting.playground.gatk.walkers;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.gatk.refdata.HapMapAlleleFrequenciesROD;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import net.sf.samtools.SAMRecord;
import java.util.List;
import java.util.Formatter;
import static java.lang.Math.log10;
import edu.mit.broad.picard.genotype.DiploidGenotype; import edu.mit.broad.picard.genotype.DiploidGenotype;
import net.sf.picard.PicardException; import net.sf.picard.PicardException;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.HapMapAlleleFrequenciesROD;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import static java.lang.Math.log10;
import java.util.Arrays;
import java.util.Formatter;
import java.util.List;
public class PopPriorWalker extends LocusWalker<Integer, Integer> { public class PopPriorWalker extends LocusWalker<Integer, Integer> {
@ -174,7 +174,7 @@ public class PopPriorWalker extends LocusWalker<Integer, Integer> {
priors = getKnownSiteKnownFreqPriors(((byte)(upRef & 0xff)), knownAlleles, hapmap.getVarAlleleFreq()); priors = getKnownSiteKnownFreqPriors(((byte)(upRef & 0xff)), knownAlleles, hapmap.getVarAlleleFreq());
} else if (dbsnpInfo != null && dbsnpInfo.isSNP()) { } else if (dbsnpInfo != null && dbsnpInfo.isSNP()) {
List<String> knownAlleles = dbsnpInfo.getAllelesFWD(); List<String> knownAlleles = Arrays.asList(Utils.join("",dbsnpInfo.getAlleleList()));
priorType = "DBSNP"; priorType = "DBSNP";
rodString = "[DBSNP: " + dbsnpInfo.toMediumString() + "]"; rodString = "[DBSNP: " + dbsnpInfo.toMediumString() + "]";

View File

@ -68,7 +68,8 @@ public class IndelSubsets implements ConcordanceType {
// only deal with a valid indel // only deal with a valid indel
Variation indel = ( indel1 != null ? indel1 : indel2 ); Variation indel = ( indel1 != null ? indel1 : indel2 );
int size = ( indel.getAlternateBases().length() <= sizeCutoff ? 0 : 1 ); // we only deal with the first allele
int size = ( indel.getAlternateAlleleList().get(0).length() <= sizeCutoff ? 0 : 1 );
int homopol = ( homopolymerRunSize(ref, indel) <= homopolymerCutoff ? 0 : 1 ); int homopol = ( homopolymerRunSize(ref, indel) <= homopolymerCutoff ? 0 : 1 );
writers[set1][set2][size][homopol].println(indel.toString()); writers[set1][set2][size][homopol].println(indel.toString());
@ -80,7 +81,7 @@ public class IndelSubsets implements ConcordanceType {
GenomeLoc locus = ref.getLocus(); GenomeLoc locus = ref.getLocus();
int refBasePos = (int)(locus.getStart() - window.getStart()); int refBasePos = (int)(locus.getStart() - window.getStart());
char indelBase = indel.isDeletion() ? bases[refBasePos+1] : indel.getAlternateBases().charAt(0); char indelBase = indel.isDeletion() ? bases[refBasePos+1] : indel.getAlternateAlleleList().get(0).charAt(0);
int leftRun = 0; int leftRun = 0;
for ( int i = refBasePos; i >= 0; i--) { for ( int i = refBasePos; i >= 0; i--) {
if ( bases[i] != indelBase ) if ( bases[i] != indelBase )
@ -88,9 +89,9 @@ public class IndelSubsets implements ConcordanceType {
leftRun++; leftRun++;
} }
indelBase = indel.isDeletion() ? bases[Math.min(refBasePos+indel.getAlternateBases().length(),bases.length-1)] : indel.getAlternateBases().charAt(indel.getAlternateBases().length()-1); indelBase = indel.isDeletion() ? bases[Math.min(refBasePos+indel.getAlternateAlleleList().get(0).length(),bases.length-1)] : indel.getAlternateAlleleList().get(0).charAt(indel.getAlternateAlleleList().get(0).length()-1);
int rightRun = 0; int rightRun = 0;
for ( int i = refBasePos + (indel.isDeletion() ? 1+indel.getAlternateBases().length() : 1); i < bases.length; i++) { for ( int i = refBasePos + (indel.isDeletion() ? 1+indel.getAlternateAlleleList().get(0).length() : 1); i < bases.length; i++) {
if ( bases[i] != indelBase ) if ( bases[i] != indelBase )
break; break;
rightRun++; rightRun++;

View File

@ -12,10 +12,9 @@ import java.util.List;
* SOFTWARE COPYRIGHT NOTICE AGREEMENT * SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the * This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved. * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
* * <p/>
* This software is supplied without any warranty or guaranteed support whatsoever. Neither * This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*
*/ */
public class IndelMetricsAnalysis extends BasicVariantAnalysis implements GenotypeAnalysis, PopulationAnalysis { public class IndelMetricsAnalysis extends BasicVariantAnalysis implements GenotypeAnalysis, PopulationAnalysis {
long insertions = 0; long insertions = 0;
@ -30,19 +29,20 @@ public class IndelMetricsAnalysis extends BasicVariantAnalysis implements Genoty
} }
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
if ( eval != null && eval.isInsertion() ) { if (eval != null && eval.isInsertion()) {
if ( eval.isInsertion() ) if (eval.isInsertion())
insertions++; insertions++;
else if ( eval.isDeletion() ) else if (eval.isDeletion())
deletions++; deletions++;
else else
throw new RuntimeException("Variation is indel, but isn't insertion or deletion!"); throw new RuntimeException("Variation is indel, but isn't insertion or deletion!");
if ( eval.getAlternateBases().length() < 100 ) { for (String allele : eval.getAlleleList())
sizes[eval.isDeletion() ? 0 : 1][eval.getAlternateBases().length()]++; if (allele.length() < 100) {
if ( eval.getAlternateBases().length() > maxSize ) sizes[eval.isDeletion() ? 0 : 1][allele.length()]++;
maxSize = eval.getAlternateBases().length(); if (allele.length() > maxSize)
} maxSize = allele.length();
}
} }
return null; return null;
@ -56,7 +56,7 @@ public class IndelMetricsAnalysis extends BasicVariantAnalysis implements Genoty
s.add("Size Distribution"); s.add("Size Distribution");
s.add("size\tdeletions\tinsertions"); s.add("size\tdeletions\tinsertions");
for ( int i = 1; i <= maxSize; i++ ) for (int i = 1; i <= maxSize; i++)
s.add(String.format("%d\t%d\t\t%d", i, sizes[0][i], sizes[1][i])); s.add(String.format("%d\t%d\t\t%d", i, sizes[0][i], sizes[1][i]));
return s; return s;

View File

@ -201,7 +201,7 @@ class PooledConcordanceTable {
public boolean pooledCallIsRef(Variation eval, char ref) { public boolean pooledCallIsRef(Variation eval, char ref) {
// code broken out for easy alteration when we start using pool-specific variations // code broken out for easy alteration when we start using pool-specific variations
return eval.getAlternateBases().equalsIgnoreCase((Utils.dupString(ref,2))); return Utils.join("",eval.getAlleleList()).equalsIgnoreCase((Utils.dupString(ref,2)));
} }
public int calculateNumFrequencyIndeces(int poolSize) { public int calculateNumFrequencyIndeces(int poolSize) {
@ -225,7 +225,7 @@ class PooledConcordanceTable {
for ( Variation eval : evals ) { for ( Variation eval : evals ) {
if ( mismatchingCalls(firstEval, eval, ref) ) { if ( mismatchingCalls(firstEval, eval, ref) ) {
// todo -- make this not a StingException but go to the log // todo -- make this not a StingException but go to the log
throw new StingException("Tri-Allelic Position "+eval.getAlternateBases()+"/"+firstEval.getAlternateBases() + " Ref: "+ ref + " not supported"); throw new StingException("Tri-Allelic Position "+Utils.join("",eval.getAlleleList())+"/"+Utils.join("",firstEval.getAlleleList()) + " Ref: "+ ref + " not supported");
} else { } else {
alternateFrequency += calledVariantFrequency(eval,ref); alternateFrequency += calledVariantFrequency(eval,ref);
} }
@ -249,10 +249,10 @@ class PooledConcordanceTable {
public boolean mismatchingCalls(Variation eval, Variation chip, char ref) { public boolean mismatchingCalls(Variation eval, Variation chip, char ref) {
// eval and chip guaranteed to be non-null // eval and chip guaranteed to be non-null
char chipF = chip.getAlternateBases().charAt(0); char chipF = Utils.stringToChar(chip.getAlleleList().get(0));
char chipS = chip.getAlternateBases().charAt(1); char chipS = Utils.stringToChar(chip.getAlleleList().get(1));
char evalF = chip.getAlternateBases().charAt(0); char evalF = Utils.stringToChar(eval.getAlleleList().get(0));
char evalS = chip.getAlternateBases().charAt(1); char evalS = Utils.stringToChar(eval.getAlleleList().get(1));
boolean mismatch; boolean mismatch;
if (chipF == ref) { if (chipF == ref) {
if ( chipS == ref ) { if ( chipS == ref ) {
@ -277,7 +277,7 @@ class PooledConcordanceTable {
public double calledVariantFrequency( Variation var, char ref ) { public double calledVariantFrequency( Variation var, char ref ) {
// code broken out for easy alteration when we start using pool-specific variations // code broken out for easy alteration when we start using pool-specific variations
String varStr = var.getAlternateBases(); String varStr = Utils.join("",var.getAlleleList());
double freq; double freq;
if ( varStr.charAt(0) != ref && varStr.charAt(1) != ref ) { if ( varStr.charAt(0) != ref && varStr.charAt(1) != ref ) {
freq = (double) 2; freq = (double) 2;

View File

@ -1,7 +1,9 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval; package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.refdata.BrokenRODSimulator;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.Variation; import org.broadinstitute.sting.utils.genotype.Variation;
import java.util.ArrayList; import java.util.ArrayList;
@ -90,9 +92,9 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
} }
public boolean discordantP(Variation dbSNP, Variation eval) { public boolean discordantP(Variation dbSNP, Variation eval) {
if (eval != null) { if (eval != null) {
char alt = (eval.isSNP()) ? eval.getAlternativeBaseForSNP() : eval.getReference().charAt(0); char alt = (eval.isSNP()) ? eval.getAlternativeBaseForSNP() : Utils.stringToChar(eval.getReference());
if (dbSNP != null && dbSNP.isSNP()) if (dbSNP != null && dbSNP.isSNP())
return !dbSNP.getAlternateBases().contains(String.valueOf(alt)); return !dbSNP.getAlleleList().contains(String.valueOf(alt));
} }
return false; return false;
} }
@ -125,7 +127,7 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
if (dbsnp.isSNP() && eval.isSNP() && discordantP(dbsnp, eval)) { if (dbsnp.isSNP() && eval.isSNP() && discordantP(dbsnp, eval)) {
return String.format("Discordant [DBSNP %s] [EVAL %s]", dbsnp, eval); return String.format("Discordant [DBSNP %s] [EVAL %s]", dbsnp, eval);
} else if (dbsnp.isIndel() && eval.isSNP()) { } else if (dbsnp.isIndel() && eval.isSNP()) {
return String.format("SNP-at-indel DBSNP=%s %s", dbsnp.getAlternateBases(), eval); return String.format("SNP-at-indel DBSNP=%s %s", Utils.join("",dbsnp.getAlleleList()), eval);
} else { } else {
return null; return null;
} }

View File

@ -81,7 +81,7 @@ public class GenotypeUtils {
if ( var instanceof Genotype ) if ( var instanceof Genotype )
return ((Genotype)var).isHet(); return ((Genotype)var).isHet();
String genotype = var.getAlternateBases(); String genotype = Utils.join("",var.getAlleleList());
if ( genotype.length() < 1 ) if ( genotype.length() < 1 )
return false; return false;

View File

@ -2,16 +2,11 @@ package org.broadinstitute.sting.utils;
import net.sf.samtools.*; import net.sf.samtools.*;
import net.sf.samtools.util.StringUtil; import net.sf.samtools.util.StringUtil;
import net.sf.picard.reference.ReferenceSequenceFile;
import java.util.*;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.BufferedReader;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import java.io.File;
import java.util.*;
/** /**
* Created by IntelliJ IDEA. * Created by IntelliJ IDEA.
* User: depristo * User: depristo
@ -20,9 +15,7 @@ import org.apache.log4j.Logger;
* To change this template use File | Settings | File Templates. * To change this template use File | Settings | File Templates.
*/ */
public class Utils { public class Utils {
/** /** our log, which we want to capture anything from this class */
* our log, which we want to capture anything from this class
*/
private static Logger logger = Logger.getLogger(Utils.class); private static Logger logger = Logger.getLogger(Utils.class);
public static void warnUser(final String msg) { public static void warnUser(final String msg) {
@ -45,25 +38,28 @@ public class Utils {
/** /**
* Compares two objects, either of which might be null. * Compares two objects, either of which might be null.
*
* @param lhs One object to compare. * @param lhs One object to compare.
* @param rhs The other object to compare. * @param rhs The other object to compare.
*
* @return True if the two objects are equal, false otherwise. * @return True if the two objects are equal, false otherwise.
*/ */
public static boolean equals(Object lhs, Object rhs) { public static boolean equals(Object lhs, Object rhs) {
if( lhs == null && rhs == null ) return true; if (lhs == null && rhs == null) return true;
else if( lhs == null ) return false; else if (lhs == null) return false;
else return lhs.equals(rhs); else return lhs.equals(rhs);
} }
public static <T> List<T> cons(final T elt, final List<T> l) { public static <T> List<T> cons(final T elt, final List<T> l) {
List<T> l2 = new ArrayList<T>(); List<T> l2 = new ArrayList<T>();
l2.add(elt); l2.add(elt);
if ( l != null ) l2.addAll(l); if (l != null) l2.addAll(l);
return l2; return l2;
} }
/** /**
* pretty print the warning message supplied * pretty print the warning message supplied
*
* @param message the message * @param message the message
*/ */
private static void prettyPrintWarningMessage(String message) { private static void prettyPrintWarningMessage(String message) {
@ -71,13 +67,13 @@ public class Utils {
while (builder.length() > 70) { while (builder.length() > 70) {
int space = builder.lastIndexOf(" ", 70); int space = builder.lastIndexOf(" ", 70);
if (space <= 0) space = 70; if (space <= 0) space = 70;
logger.warn(String.format("* %s", builder.substring(0,space))); logger.warn(String.format("* %s", builder.substring(0, space)));
builder.delete(0,space + 1); builder.delete(0, space + 1);
} }
logger.warn(String.format("* %s", builder)); logger.warn(String.format("* %s", builder));
} }
public static SAMFileHeader copySAMFileHeader( SAMFileHeader toCopy ) { public static SAMFileHeader copySAMFileHeader(SAMFileHeader toCopy) {
SAMFileHeader copy = new SAMFileHeader(); SAMFileHeader copy = new SAMFileHeader();
copy.setSortOrder(toCopy.getSortOrder()); copy.setSortOrder(toCopy.getSortOrder());
@ -86,9 +82,9 @@ public class Utils {
copy.setReadGroups(toCopy.getReadGroups()); copy.setReadGroups(toCopy.getReadGroups());
copy.setSequenceDictionary(toCopy.getSequenceDictionary()); copy.setSequenceDictionary(toCopy.getSequenceDictionary());
for ( Map.Entry<String, Object> e : toCopy.getAttributes()) for (Map.Entry<String, Object> e : toCopy.getAttributes())
copy.setAttribute(e.getKey(), e.getValue()); copy.setAttribute(e.getKey(), e.getValue());
return copy; return copy;
} }
@ -104,6 +100,7 @@ public class Utils {
* *
* @param pred filtering condition ( objects, for which pred.apply() is true pass the filter ) * @param pred filtering condition ( objects, for which pred.apply() is true pass the filter )
* @param c collection to filter (will not be modified) * @param c collection to filter (will not be modified)
*
* @return new list built from elements of <c> passing the filter * @return new list built from elements of <c> passing the filter
* @see #filterInPlace(Predicate pred, Collection c) * @see #filterInPlace(Predicate pred, Collection c)
*/ */
@ -135,6 +132,7 @@ public class Utils {
* *
* @param pred filtering condition (only elements, for which pred.apply() is true will be kept in the collection) * @param pred filtering condition (only elements, for which pred.apply() is true will be kept in the collection)
* @param c collection to filter (will be modified - should be mutable and should implement remove() ) * @param c collection to filter (will be modified - should be mutable and should implement remove() )
*
* @return reference to the same (modified) collection <c> * @return reference to the same (modified) collection <c>
* @see #filter(Predicate pred, Collection c) * @see #filter(Predicate pred, Collection c)
*/ */
@ -175,12 +173,12 @@ public class Utils {
public static ArrayList<Byte> subseq(char[] fullArray) { public static ArrayList<Byte> subseq(char[] fullArray) {
byte[] fullByteArray = new byte[fullArray.length]; byte[] fullByteArray = new byte[fullArray.length];
StringUtil.charsToBytes(fullArray,0,fullArray.length,fullByteArray,0); StringUtil.charsToBytes(fullArray, 0, fullArray.length, fullByteArray, 0);
return subseq(fullByteArray); return subseq(fullByteArray);
} }
public static ArrayList<Byte> subseq(byte[] fullArray) { public static ArrayList<Byte> subseq(byte[] fullArray) {
return subseq(fullArray, 0, fullArray.length-1); return subseq(fullArray, 0, fullArray.length - 1);
} }
public static ArrayList<Byte> subseq(byte[] fullArray, int start, int end) { public static ArrayList<Byte> subseq(byte[] fullArray, int start, int end) {
@ -204,9 +202,9 @@ public class Utils {
public static boolean is454Read(SAMRecord read) { public static boolean is454Read(SAMRecord read) {
SAMReadGroupRecord readGroup = read.getReadGroup(); SAMReadGroupRecord readGroup = read.getReadGroup();
if ( readGroup != null ) { if (readGroup != null) {
Object readPlatformAttr = readGroup.getAttribute("PL"); Object readPlatformAttr = readGroup.getAttribute("PL");
if ( readPlatformAttr != null ) if (readPlatformAttr != null)
return readPlatformAttr.toString().toUpperCase().contains("454"); return readPlatformAttr.toString().toUpperCase().contains("454");
} }
return false; return false;
@ -248,7 +246,7 @@ public class Utils {
return ""; return "";
} }
StringBuilder ret = new StringBuilder(strings[start]); StringBuilder ret = new StringBuilder(strings[start]);
for (int i = start+1; i < end; ++i) { for (int i = start + 1; i < end; ++i) {
ret.append(separator); ret.append(separator);
ret.append(strings[i]); ret.append(strings[i]);
} }
@ -332,13 +330,13 @@ public class Utils {
public static Integer[] SortPermutation(final double[] A) { public static Integer[] SortPermutation(final double[] A) {
class comparator implements Comparator<Integer> { class comparator implements Comparator<Integer> {
public int compare(Integer a, Integer b) { public int compare(Integer a, Integer b) {
if (A[a.intValue()] < A[ b.intValue() ]) { if (A[a.intValue()] < A[b.intValue()]) {
return -1; return -1;
} }
if (A[ a.intValue() ] == A[ b.intValue() ]) { if (A[a.intValue()] == A[b.intValue()]) {
return 0; return 0;
} }
if (A[ a.intValue() ] > A[ b.intValue() ]) { if (A[a.intValue()] > A[b.intValue()]) {
return 1; return 1;
} }
return 0; return 0;
@ -401,8 +399,7 @@ public class Utils {
return output; return output;
} }
public static <T> List<T> PermuteList(List<T> list, Integer[] permutation) public static <T> List<T> PermuteList(List<T> list, Integer[] permutation) {
{
List<T> output = new ArrayList<T>(); List<T> output = new ArrayList<T>();
for (int i = 0; i < permutation.length; i++) { for (int i = 0; i < permutation.length; i++) {
output.add(list.get(permutation[i])); output.add(list.get(permutation[i]));
@ -412,47 +409,59 @@ public class Utils {
/** Draw N random elements from list. */ /** Draw N random elements from list. */
public static <T> List<T> RandomSubset(List<T> list, int N) public static <T> List<T> RandomSubset(List<T> list, int N) {
{ if (list.size() <= N) {
if (list.size() <= N) { return list; } return list;
}
java.util.Random random = new java.util.Random(); java.util.Random random = new java.util.Random();
int idx[] = new int[list.size()]; int idx[] = new int[list.size()];
for (int i = 0; i < list.size(); i++) { idx[i] = random.nextInt(); } for (int i = 0; i < list.size(); i++) {
idx[i] = random.nextInt();
}
Integer[] perm = SortPermutation(idx); Integer[] perm = SortPermutation(idx);
List<T> ans = new ArrayList<T>(); List<T> ans = new ArrayList<T>();
for (int i = 0; i < N; i++) { ans.add(list.get(perm[i])); } for (int i = 0; i < N; i++) {
ans.add(list.get(perm[i]));
}
return ans; return ans;
} }
// lifted from the internet // lifted from the internet
// http://www.cs.princeton.edu/introcs/91float/Gamma.java.html // http://www.cs.princeton.edu/introcs/91float/Gamma.java.html
public static double logGamma(double x) public static double logGamma(double x) {
{ double tmp = (x - 0.5) * Math.log(x + 4.5) - (x + 4.5);
double tmp = (x - 0.5) * Math.log(x + 4.5) - (x + 4.5); double ser = 1.0 + 76.18009173 / (x + 0) - 86.50532033 / (x + 1)
double ser = 1.0 + 76.18009173 / (x + 0) - 86.50532033 / (x + 1) + 24.01409822 / (x + 2) - 1.231739516 / (x + 3)
+ 24.01409822 / (x + 2) - 1.231739516 / (x + 3) + 0.00120858003 / (x + 4) - 0.00000536382 / (x + 5);
+ 0.00120858003 / (x + 4) - 0.00000536382 / (x + 5); return tmp + Math.log(ser * Math.sqrt(2 * Math.PI));
return tmp + Math.log(ser * Math.sqrt(2 * Math.PI));
} }
public static double percentage(double x, double base) { return (base> 0 ? (x/base)*100.0 : 0); } public static double percentage(double x, double base) {
public static double percentage(int x, int base) { return (base> 0 ? ((double)x/(double)base)*100.0 : 0); } return (base > 0 ? (x / base) * 100.0 : 0);
public static double percentage(long x, long base) { return (base> 0 ? ((double)x/(double)base)*100.0 : 0); } }
public static String dupString( char c, int nCopies ) { public static double percentage(int x, int base) {
return (base > 0 ? ((double) x / (double) base) * 100.0 : 0);
}
public static double percentage(long x, long base) {
return (base > 0 ? ((double) x / (double) base) * 100.0 : 0);
}
public static String dupString(char c, int nCopies) {
char[] chars = new char[nCopies]; char[] chars = new char[nCopies];
Arrays.fill(chars,c); Arrays.fill(chars, c);
return new String(chars); return new String(chars);
} }
public static int countOccurrences(char c, String s) { public static int countOccurrences(char c, String s) {
int count = 0; int count = 0;
for ( int i = 0; i < s.length(); i++ ) { for (int i = 0; i < s.length(); i++) {
count += s.charAt(i) == c ? 1 : 0; count += s.charAt(i) == c ? 1 : 0;
} }
return count; return count;
@ -460,17 +469,17 @@ public class Utils {
public static <T> int countOccurrences(T x, List<T> l) { public static <T> int countOccurrences(T x, List<T> l) {
int count = 0; int count = 0;
for ( T y : l ) { for (T y : l) {
if ( x.equals(y) ) count++; if (x.equals(y)) count++;
} }
return count; return count;
} }
public static byte listMaxByte(List<Byte> quals) { public static byte listMaxByte(List<Byte> quals) {
if ( quals.size() == 0 ) return 0; if (quals.size() == 0) return 0;
byte m = quals.get(0); byte m = quals.get(0);
for ( byte b : quals ) { for (byte b : quals) {
m = b > m ? b : m; m = b > m ? b : m;
} }
return m; return m;
@ -479,67 +488,84 @@ public class Utils {
/** Returns indices of all occurrences of the specified symbol in the string */ /** Returns indices of all occurrences of the specified symbol in the string */
public static int[] indexOfAll(String s, int ch) { public static int[] indexOfAll(String s, int ch) {
int[] pos = new int[64]; int[] pos = new int[64];
int z = 0; int z = 0;
for ( int i = 0 ; i < s.length() ; i++ ) { for (int i = 0; i < s.length(); i++) {
if ( s.charAt(i) == ch ) pos[z++] = i; if (s.charAt(i) == ch) pos[z++] = i;
} }
return reallocate(pos,z); return reallocate(pos, z);
} }
/** Returns new (reallocated) integer array of the specified size, with content /**
* Returns new (reallocated) integer array of the specified size, with content
* of the original array <code>orig</code> copied into it. If <code>newSize</code> is * of the original array <code>orig</code> copied into it. If <code>newSize</code> is
* less than the size of the original array, only first <code>newSize</code> elements will be copied. * less than the size of the original array, only first <code>newSize</code> elements will be copied.
* If new size is greater than the size of the original array, the content of the original array will be padded * If new size is greater than the size of the original array, the content of the original array will be padded
* with zeros up to the new size. Finally, if new size is the same as original size, no memory reallocation * with zeros up to the new size. Finally, if new size is the same as original size, no memory reallocation
* will be performed and the original array will be returned instead. * will be performed and the original array will be returned instead.
*
* @param orig * @param orig
* @param newSize * @param newSize
*
* @return * @return
*/ */
public static int[] reallocate(int[] orig, int newSize) { public static int[] reallocate(int[] orig, int newSize) {
if ( orig.length == newSize ) return orig; if (orig.length == newSize) return orig;
int[] new_array = new int[newSize]; int[] new_array = new int[newSize];
int L = ( newSize > orig.length ? orig.length : newSize ); int L = (newSize > orig.length ? orig.length : newSize);
for ( int i = 0 ; i < L ; i++ ) new_array[i] = orig[i]; for (int i = 0; i < L; i++) new_array[i] = orig[i];
return new_array; return new_array;
} }
/* TEST ME /* TEST ME
public static void main(String[] argv) { public static void main(String[] argv) {
List<Integer> l1 = new LinkedList<Integer>(); List<Integer> l1 = new LinkedList<Integer>();
List<Integer> l2 = new ArrayList<Integer>(); List<Integer> l2 = new ArrayList<Integer>();
l1.add(1); l1.add(1);
l1.add(5); l1.add(5);
l1.add(3); l1.add(3);
l1.add(10); l1.add(10);
l1.add(4); l1.add(4);
l1.add(2); l1.add(2);
l2.add(1); l2.add(1);
l2.add(5); l2.add(5);
l2.add(3); l2.add(3);
l2.add(10); l2.add(10);
l2.add(4); l2.add(4);
l2.add(2); l2.add(2);
Predicate<Integer> p = new Predicate<Integer>() { Predicate<Integer> p = new Predicate<Integer>() {
public boolean apply(Integer i) { public boolean apply(Integer i) {
return i > 2; return i > 2;
} }
}; };
filterInPlace(p, l1); filterInPlace(p, l1);
filterInPlace(p, l2); filterInPlace(p, l2);
for ( int i = 0 ; i < l1.size(); i++ ) System.out.print(" "+l1.get(i)); for ( int i = 0 ; i < l1.size(); i++ ) System.out.print(" "+l1.get(i));
System.out.println(); System.out.println();
for ( int i = 0 ; i < l2.size(); i++ ) System.out.print(" " + l2.get(i)); for ( int i = 0 ; i < l2.size(); i++ ) System.out.print(" " + l2.get(i));
System.out.println(); System.out.println();
}
*/
/**
* a helper method. Turns a single character string into a char.
*
* @param str the string
*
* @return a char
*/
public static char stringToChar(String str) {
if (str.length() != 1) throw new IllegalArgumentException("String length must be one");
return str.charAt(0);
} }
*/
} }

View File

@ -2,6 +2,8 @@ package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.List;
/** /**
* @author aaron * @author aaron
@ -153,4 +155,17 @@ public class BasicGenotype implements Genotype {
if (!isVariant(this.mRef)) throw new IllegalStateException("this genotype is not a variant"); if (!isVariant(this.mRef)) throw new IllegalStateException("this genotype is not a variant");
return new BasicVariation(this.getBases(), String.valueOf(mRef), this.getBases().length(), mLocation, mNegLog10PError); return new BasicVariation(this.getBases(), String.valueOf(mRef), this.getBases().length(), mLocation, mNegLog10PError);
} }
/**
* Turn a list of alleles into a genotype
* @param alleles the list of alleles
* @return a string representation of this list
*/
public static String alleleListToString(List<String> alleles) {
StringBuilder builder = new StringBuilder();
for (String allele : alleles)
builder.append(allele);
return builder.toString();
}
} }

View File

@ -1,9 +1,11 @@
package org.broadinstitute.sting.utils.genotype; package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils;
import java.util.List;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List;
/** /**
* User: aaron * User: aaron
@ -40,6 +42,7 @@ public class BasicVariation implements Variation {
public BasicVariation(String bases, String reference, int length, GenomeLoc location, double confidence) { public BasicVariation(String bases, String reference, int length, GenomeLoc location, double confidence) {
mBases = bases; mBases = bases;
mRef = reference; mRef = reference;
if (mRef.length() != 1) throw new StingException("The reference must be a single base");
mLength = length; mLength = length;
mLocation = location; mLocation = location;
mConfidence = confidence; mConfidence = confidence;
@ -82,23 +85,6 @@ public class BasicVariation implements Variation {
return (mLength < 0); return (mLength < 0);
} }
@Override
public String getAlternateBases() {
return mBases;
}
/**
* gets the alternate bases. Use this method if teh allele count is greater then 2
*
* @return
*/
@Override
public List<String> getAlternateBaseList() {
List<String> list = new ArrayList<String>();
list.add(this.getAlternateBases());
return list;
}
@Override @Override
public GenomeLoc getLocation() { public GenomeLoc getLocation() {
return mLocation; return mLocation;
@ -112,7 +98,7 @@ public class BasicVariation implements Variation {
/** are we bi-allelic? */ /** are we bi-allelic? */
@Override @Override
public boolean isBiallelic() { public boolean isBiallelic() {
return true; return (getAlternateAlleleList().size() == 1);
} }
@Override @Override
@ -120,6 +106,40 @@ public class BasicVariation implements Variation {
return mConfidence; return mConfidence;
} }
/**
* gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference base. This is returned as a string list with no guarantee ordering
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
* frequency).
*
* @return an alternate allele list
*/
@Override
public List<String> getAlternateAlleleList() {
List<String> list = new ArrayList<String>();
for (char c : this.mBases.toCharArray())
if (c != Utils.stringToChar(mRef))
list.add(String.valueOf(c));
return list;
}
/**
* gets the alleles. This method should return all the alleles present at the location,
* including the reference base. The first allele should always be the reference allele, followed
* by an unordered list of alternate alleles.
*
* @return an alternate allele list
*/
@Override
public List<String> getAlleleList() {
List<String> list = new ArrayList<String>();
if (this.mBases.contains(mRef)) list.add(mRef);
for (char c : this.mBases.toCharArray())
if (c != Utils.stringToChar(mRef))
list.add(String.valueOf(c));
return list;
}
@Override @Override
public boolean isReference() { public boolean isReference() {
if (mLength != 0) return false; if (mLength != 0) return false;
@ -149,11 +169,8 @@ public class BasicVariation implements Variation {
@Override @Override
public char getAlternativeBaseForSNP() { public char getAlternativeBaseForSNP() {
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP"); if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
if (!this.isBiallelic() || this.getAlternateAlleleList().size() != 1) throw new IllegalStateException("we're not biallelic");
// we know that if we're a snp, the reference is a single base, so charAt(0) is safe return Utils.stringToChar(this.getAlternateAlleleList().get(0));
if (getAlternateBases().charAt(0) == this.getReference().charAt(0))
return getAlternateBases().charAt(1);
return getAlternateBases().charAt(0);
} }
/** /**
@ -164,11 +181,8 @@ public class BasicVariation implements Variation {
@Override @Override
public char getReferenceForSNP() { public char getReferenceForSNP() {
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP"); if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
if (!this.isBiallelic()) throw new IllegalStateException("we're not biallelic");
// we know that if we're a snp, the reference is a single base, so charAt(0) is safe return Utils.stringToChar(this.mRef);
if (getAlternateBases().charAt(0) == this.getReference().charAt(0))
return getAlternateBases().charAt(0);
return getAlternateBases().charAt(1);
} }

View File

@ -14,14 +14,21 @@ import java.util.List;
public interface Variation { public interface Variation {
// the types of variants we currently allow // the types of variants we currently allow
public enum VARIANT_TYPE { public enum VARIANT_TYPE {
SNP, INDEL, REFERENCE // though reference is not really a variant SNP, INDEL, REFERENCE // though reference is not really a variant, we need to represent it
} }
/** are we bi-allelic? */
public boolean isBiallelic();
/** /**
* get the frequency of this variant, if we're a variant. If we're reference this method * get the frequency of this variant, if we're a variant. If we're reference this method
* should return 0. * should return 0. If we can't provide an alternate allele frequency, this should also
* return 0.
* *
* @return double with the stored frequency * WARNING: This method is only valid for biAllelic data, the contract is to check isBiallelic()
* before calling this method
*
* @return double the minor allele frequency
*/ */
public double getNonRefAlleleFrequency(); public double getNonRefAlleleFrequency();
@ -32,7 +39,8 @@ public interface Variation {
public VARIANT_TYPE getType(); public VARIANT_TYPE getType();
/** /**
* are we a SNP? If not we're a Indel/deletion or the reference * are we a SNP? If not we're a Indel/deletion or the reference. This method must be call before you use
* the convenience methods getAlternativeBaseForSNP or getReferenceForSNP, to ensure that you're working with a SNP
* *
* @return true if we're a SNP * @return true if we're a SNP
*/ */
@ -60,22 +68,26 @@ public interface Variation {
public boolean isReference(); public boolean isReference();
/** /**
* get the location that this Variant represents * are we an insertion or a deletion? yes, then return true. No? false.
*
* @return true if we're an insertion or deletion
*/
public boolean isIndel();
/**
* get the location of this Variant
* *
* @return a GenomeLoc * @return a GenomeLoc
*/ */
public GenomeLoc getLocation(); public GenomeLoc getLocation();
/** /**
* get the reference base(s) at this position * get the reference base(s) for this Variant
* *
* @return the reference base or bases, as a string * @return the reference base or bases, as a string
*/ */
public String getReference(); public String getReference();
/** are we bi-allelic? */
public boolean isBiallelic();
/** /**
* get the -1 * (log 10 of the error value) * get the -1 * (log 10 of the error value)
* *
@ -83,26 +95,26 @@ public interface Variation {
*/ */
public double getNegLog10PError(); public double getNegLog10PError();
/**
* gets the alternate base. Use this method if we're biallelic
*
* @return
*/
public String getAlternateBases();
/** /**
* gets the alternate bases. Use this method if the allele count is greater then 2 (not biallelic) * gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference base. This is returned as a string list with no guarantee ordering
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
* frequency).
* *
* @return * @return an alternate allele list
*/ */
public List<String> getAlternateBaseList(); public List<String> getAlternateAlleleList();
/** /**
* are we an insertion or a deletion? yes, then return true. No? false. * gets the alleles. This method should return all the alleles present at the location,
* including the reference base. The first allele should always be the reference allele, followed
* by an unordered list of alternate alleles. If the reference base is not an allele in this varation
* it will not be in the list (i.e. there is no guarantee that the reference base is in the list).
* *
* @return true if we're an insertion or deletion * @return an alternate allele list
*/ */
public boolean isIndel(); public List<String> getAlleleList();
/** /**
* gets the alternate base is the case of a SNP. Throws an IllegalStateException if we're not a SNP * gets the alternate base is the case of a SNP. Throws an IllegalStateException if we're not a SNP

View File

@ -56,10 +56,10 @@ public class rodDbSNPTest extends BaseTest {
rodDbSNP var = (rodDbSNP)rod; rodDbSNP var = (rodDbSNP)rod;
if (rod.isSNP()) { if (rod.isSNP()) {
// quick check, if we're not triallelic, make sure the ref is right // quick check, if we're not triallelic, make sure the ref is right
if (var.getRefSnpFWD() == var.refBases.charAt(0) || var.getAltSnpFWD() == var.refBases.charAt(0)) if (var.getReferenceForSNP() == var.refBases.charAt(0) || var.getAlternativeBaseForSNP() == var.refBases.charAt(0))
// also make sure the ref is a single character // also make sure the ref is a single character
if (var.refBases.length() == 1) if (var.refBases.length() == 1)
Assert.assertTrue(var.refBases.charAt(0)==var.getRefSnpFWD()); Assert.assertTrue(var.refBases.charAt(0)==var.getReferenceForSNP());
if (var.getLocation().getContig().equals("1") && if (var.getLocation().getContig().equals("1") &&
var.getLocation().getStart() >= 10000000 && var.getLocation().getStart() >= 10000000 &&
var.getLocation().getStart() <= 11000000) { var.getLocation().getStart() <= 11000000) {

View File

@ -82,7 +82,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest {
@Test @Test
public void testEvalVariantRODOutputViolations() { public void testEvalVariantRODOutputViolations() {
List<String> md5 = new ArrayList<String>(); List<String> md5 = new ArrayList<String>();
md5.add("ad2ca71dfa7e45f369380178c4f8e69f"); md5.add("d84e5b2a23ab1cf028145f09cd1e9f5b");
/** /**
* the above MD5 was calculated from running the following command: * the above MD5 was calculated from running the following command: