intermediate commit for some changes in the Variation system, so Eric can go ahead with his changes. Everything is pretty set, but the Variation interface could use a convenience method that joins all the alternate alleles.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1903 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-10-23 06:31:15 +00:00
parent 6c338eccb8
commit ad1fc511b1
23 changed files with 585 additions and 508 deletions

View File

@ -2,8 +2,9 @@ package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.genotype.Genotype;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* loc ref alt EM_alt_freq discovery_likelihood discovery_null discovery_prior discovery_lod EM_N n_ref n_het n_hom
* chr1:1104840 A N 0.000000 -85.341265 -85.341265 0.000000 0.000000 324.000000 162 0 0
@ -23,28 +24,6 @@ public class PooledEMSNPROD extends TabularROD implements SNPCallFromGenotypes,
public char getAltSnpFWD() throws IllegalStateException { return getAltBasesFWD().charAt(0); }
public boolean isReference() { return getVariationConfidence() < 0.01; }
/**
* gets the alternate base. Use this method if we're biallelic
*
* @return
*/
@Override
public String getAlternateBases() {
return getAltBasesFWD();
}
/**
* gets the alternate bases. Use this method if the allele count is greater then 2
*
* @return
*/
@Override
public List<String> getAlternateBaseList() {
List<String> str = new ArrayList<String>();
str.add(this.getAltBasesFWD());
return str;
}
/**
* get the frequency of this variant
*
@ -120,6 +99,33 @@ public class PooledEMSNPROD extends TabularROD implements SNPCallFromGenotypes,
return this.getVariationConfidence();
}
/**
* gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference base. This is returned as a string list with no guarantee ordering
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
* frequency).
*
* @return an alternate allele list
*/
@Override
public List<String> getAlternateAlleleList() {
return Arrays.asList(getAltBasesFWD());
}
/**
* gets the alleles. This method should return all the alleles present at the location,
* including the reference base. The first allele should always be the reference allele, followed
* by an unordered list of alternate alleles.
*
* @return an alternate allele list
*/
@Override
public List<String> getAlleleList() {
List<String> alleles = Arrays.asList(this.getReference());
alleles.addAll(getAlternateAlleleList());
return alleles;
}
public int length() { return 1; }
// SNPCallFromGenotypes interface

View File

@ -161,10 +161,9 @@ public class RodGLF implements VariationRod, Iterator<RodGLF> {
@Override
public char getAlternativeBaseForSNP() {
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
if (getAlternateBases().charAt(0) == this.getReference().charAt(0))
return getAlternateBases().charAt(1);
return getAlternateBases().charAt(0);
List<String> alleles = this.getAlternateAlleleList();
if (alleles.size() != 1) throw new StingException("We're not biAllelic()");
return Utils.stringToChar(alleles.get(0));
}
/**
@ -175,10 +174,7 @@ public class RodGLF implements VariationRod, Iterator<RodGLF> {
@Override
public char getReferenceForSNP() {
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
if (getAlternateBases().charAt(0) == this.getReference().charAt(0))
return getAlternateBases().charAt(0);
return getAlternateBases().charAt(1);
return Utils.stringToChar(getReference());
}
/**
@ -207,7 +203,7 @@ public class RodGLF implements VariationRod, Iterator<RodGLF> {
* Get the nth best genotype (one based), i.e. to get the best genotype pass in 1,
* the second best 2, etdc.
*
* @param nthBest the nth best genotype to get
* @param nthBest the nth best genotype to get (1 based, NOT ZERO BASED)
*
* @return a GENOTYPE object representing the nth best genotype
*/
@ -253,28 +249,6 @@ public class RodGLF implements VariationRod, Iterator<RodGLF> {
((VariableLengthCall) mRecord).getIndelLen1() < 0);
}
/**
* get the base representation of this Variant
*
* @return a string, of ploidy
*/
@Override
public String getAlternateBases() {
return this.getBestGenotype(1).toString();
}
/**
* gets the alternate bases. Use this method if teh allele count is greater then 2
*
* @return
*/
@Override
public List<String> getAlternateBaseList() {
List<String> list = new ArrayList<String>();
list.add(this.getAlternateBases());
return list;
}
/**
* Returns minor allele frequency.
*
@ -310,6 +284,42 @@ public class RodGLF implements VariationRod, Iterator<RodGLF> {
return Math.abs(getBestGenotypeValue(1) - ((SinglePointCall) mRecord).getLikelihoods()[index]) / GLFRecord.LIKELIHOOD_SCALE_FACTOR;
}
/**
* gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference base. This is returned as a string list with no guarantee ordering
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
* frequency).
*
* @return an alternate allele list
*/
@Override
public List<String> getAlternateAlleleList() {
LikelihoodObject.GENOTYPE genotype = getBestGenotype(1);
List<String> ret = new ArrayList<String>();
for (char c : genotype.toString().toCharArray()) {
if (!String.valueOf(c).equals(this.getReference())) ret.add(String.valueOf(c));
}
return ret;
}
/**
* gets the alleles. This method should return all the alleles present at the location,
* including the reference base. The first allele should always be the reference allele, followed
* by an unordered list of alternate alleles.
*
* @return an alternate allele list
*/
@Override
public List<String> getAlleleList() {
LikelihoodObject.GENOTYPE genotype = getBestGenotype(1);
List<String> list = new ArrayList<String>();
if (genotype.toString().contains(this.getReference())) list.add(this.getReference());
for (char c : genotype.toString().toCharArray())
if (c != Utils.stringToChar(getReference()))
list.add(String.valueOf(c));
return list;
}
public int length() {
return 1;
}

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.BasicGenotype;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import org.broadinstitute.sting.utils.genotype.Genotype;
@ -134,6 +135,40 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements Variation
return Math.abs(lodBtr);
}
/**
* gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference base. This is returned as a string list with no guarantee ordering
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
* frequency).
*
* @return an alternate allele list
*/
@Override
public List<String> getAlternateAlleleList() {
List<String> list = new ArrayList<String>();
for (char base : bestGenotype.toCharArray())
if (base != refBase)
list.add(String.valueOf(base));
return list;
}
/**
* gets the alleles. This method should return all the alleles present at the location,
* including the reference base. The first allele should always be the reference allele, followed
* by an unordered list of alternate alleles.
*
* @return an alternate allele list
*/
@Override
public List<String> getAlleleList() {
List<String> list = new ArrayList<String>();
if (this.bestGenotype.contains(getReference())) list.add(getReference());
for (char c : this.bestGenotype.toCharArray())
if (c != Utils.stringToChar(getReference()))
list.add(String.valueOf(c));
return list;
}
public String getRefBasesFWD() {
return String.format("%c", getRefSnpFWD());
}
@ -147,7 +182,7 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements Variation
}
public char getAltSnpFWD() throws IllegalStateException {
// both ref and bestGenotype have been uppercased, so it's safe to use ==
// both ref and bestGenotype have been uppercased, so it's safe to use ==
char c = (bestGenotype.charAt(0) == refBase) ? bestGenotype.charAt(1) : bestGenotype.charAt(0);
//System.out.printf("%s : %c and %c%n", bestGenotype, refBase, c);
return c;
@ -187,28 +222,6 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements Variation
return false;
}
/**
* get the base representation of this Variant
*
* @return a string, of ploidy
*/
@Override
public String getAlternateBases() {
return this.bestGenotype;
}
/**
* gets the alternate bases. If this is homref, throws an UnsupportedOperationException
*
* @return
*/
@Override
public List<String> getAlternateBaseList() {
List<String> list = new ArrayList<String>();
list.add(this.getAlternateBases());
return list;
}
public boolean isIndel() {
return false;
}

View File

@ -2,10 +2,13 @@ package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.utils.genotype.BasicGenotype;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
import java.util.*;
import java.util.regex.MatchResult;
import java.util.regex.Pattern;
@ -96,6 +99,34 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements
return 4; // 1/10000 error
}
/**
* gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference base. This is returned as a string list with no guarantee ordering
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
* frequency).
*
* @return an alternate allele list
*/
@Override
public List<String> getAlternateAlleleList() {
throw new StingException("Hapmap is unable to provide an alternate allele list; the reference is unknown");
}
/**
* gets the alleles. This method should return all the alleles present at the location,
* including the reference base. The first allele should always be the reference allele, followed
* by an unordered list of alternate alleles.
*
* @return an alternate allele list
*/
@Override
public List<String> getAlleleList() {
List<String> ret = new ArrayList<String>();
for (char c : feature.toCharArray())
ret.add(String.valueOf(c));
return ret;
}
public String getAttribute(final String key) {
return attributes.get(key);
}
@ -181,28 +212,6 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements
public char getAltSnpFWD() throws IllegalStateException { return 0; }
public boolean isReference() { return ! isSNP(); }
/**
* gets the alternate bases. If this is homref, throws an UnsupportedOperationException
*
* @return
*/
@Override
public String getAlternateBases() {
return this.feature;
}
/**
* gets the alternate bases. Use this method if teh allele count is greater then 2
*
* @return
*/
@Override
public List<String> getAlternateBaseList() {
List<String> list = new ArrayList<String>();
list.add(this.getAlternateBases());
return list;
}
/**
* get the frequency of this variant
*

View File

@ -183,7 +183,7 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
/** are we bi-allelic? */
@Override
public boolean isBiallelic() {
return (this.getAlternateBaseList().size() == 1);
return (this.getAlternateAlleleList().size() == 1);
}
/**
@ -197,6 +197,37 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
return mCurrentRecord.getQual() / 10.0;
}
/**
* gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference base. This is returned as a string list with no guarantee ordering
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
* frequency).
*
* @return an alternate allele list
*/
@Override
public List<String> getAlternateAlleleList() {
List<String> list = new ArrayList<String>();
for (VCFGenotypeEncoding enc : mCurrentRecord.getAlternateAlleles())
list.add(enc.toString());
return list;
}
/**
* gets the alleles. This method should return all the alleles present at the location,
* including the reference base. The first allele should always be the reference allele, followed
* by an unordered list of alternate alleles.
*
* @return an alternate allele list
*/
@Override
public List<String> getAlleleList() {
List<String> ret = new ArrayList<String>();
ret.add(String.valueOf(mCurrentRecord.getReferenceBase()));
ret.addAll(getAlternateAlleleList());
return ret;
}
/**
* are we truely a variant, given a reference
*
@ -207,31 +238,6 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
return (!mCurrentRecord.hasAlternateAllele());
}
/**
* gets the alternate bases. If this is homref, throws an UnsupportedOperationException
*
* @return
*/
@Override
public String getAlternateBases() {
if (!this.isBiallelic())
throw new UnsupportedOperationException("We're not biallelic, so please call getAlternateBaseList instead");
return this.mCurrentRecord.getAlternateAlleles().get(0).toString();
}
/**
* gets the alternate bases. If this is homref, throws an UnsupportedOperationException
*
* @return
*/
@Override
public List<String> getAlternateBaseList() {
List<String> list = new ArrayList<String>();
for (VCFGenotypeEncoding enc : mCurrentRecord.getAlternateAlleles())
list.add(enc.toString());
return list;
}
/**
* are we an insertion or a deletion? yes, then return true. No? Well, false then.
*

View File

@ -1,9 +1,10 @@
package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.genotype.Genotype;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.List;
public class SangerSNPROD extends TabularROD implements SNPCallFromGenotypes {
@ -100,23 +101,35 @@ public class SangerSNPROD extends TabularROD implements SNPCallFromGenotypes {
}
/**
* gets the alternate base. Use this method if we're biallelic
* gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference base. This is returned as a string list with no guarantee ordering
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
* frequency).
*
* @return
* @return an alternate allele list
*/
@Override
public String getAlternateBases() {
return this.get("3");
public List<String> getAlternateAlleleList() {
List<String> ret = new ArrayList<String>();
for (char c : get("3").toCharArray())
ret.add(String.valueOf(c));
return ret;
}
/**
* gets the alternate bases. Use this method if the allele count is greater then 2 (not biallelic)
* gets the alleles. This method should return all the alleles present at the location,
* including the reference base. The first allele should always be the reference allele, followed
* by an unordered list of alternate alleles.
*
* @return
* @return an alternate allele list
*/
@Override
public List<String> getAlternateBaseList() {
return Arrays.asList(this.get("3"));
public List<String> getAlleleList() {
List<String> ret = new ArrayList<String>();
ret.add(this.getReference());
for (char c : get("3").toCharArray())
ret.add(String.valueOf(c));
return ret;
}
public int length() { return 1; }

View File

@ -5,6 +5,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@ -96,23 +97,31 @@ public class SequenomROD extends TabularROD implements Variation {
}
/**
* gets the alternate bases. Use this method if we're biallelic
* gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference base. This is returned as a string list with no guarantee ordering
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
* frequency).
*
* @return
* @return an alternate allele list
*/
@Override
public String getAlternateBases() {
return getAltBasesFWD();
public List<String> getAlternateAlleleList() {
List<String> ret = new ArrayList<String>();
for (char c: getAltBasesFWD().toCharArray())
ret.add(String.valueOf(c));
return ret;
}
/**
* gets the alternate bases. Use this method if the allele count is greater then 2 (not biallelic)
* gets the alleles. This method should return all the alleles present at the location,
* including the reference base. The first allele should always be the reference allele, followed
* by an unordered list of alternate alleles.
*
* @return
* @return an alternate allele list
*/
@Override
public List<String> getAlternateBaseList() {
throw new StingException("SequenomRod is not biallelic");
public List<String> getAlleleList() {
throw new StingException("SequenomRod doesn't know of the reference, and can't generate allele lists");
}
public boolean isHom() { return false; }

View File

@ -67,26 +67,6 @@ public class SimpleIndelROD extends TabularROD implements Genotype, VariationRod
public boolean isSNP() { return false; }
public boolean isReference() { return false; }
/**
* gets the alternate base. Use this method if we're biallelic
*
* @return
*/
@Override
public String getAlternateBases() {
return getFWDAlleles().get(0);
}
/**
* gets the alternate bases. Use this method if teh allele count is greater then 2
*
* @return
*/
@Override
public List<String> getAlternateBaseList() {
return getFWDAlleles();
}
public boolean isInsertion() {
if ( is1KGFormat() )
return this.get("3").equals("I");
@ -135,6 +115,35 @@ public class SimpleIndelROD extends TabularROD implements Genotype, VariationRod
return getVariationConfidence();
}
/**
* gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference base. This is returned as a string list with no guarantee ordering
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
* frequency).
*
* @return an alternate allele list
*/
@Override
public List<String> getAlternateAlleleList() {
List<String> ret = getAlleleList();
for (String val : ret) {
if (val.equals(this.getReference())) ret.remove(val);
}
return ret;
}
/**
* gets the alleles. This method should return all the alleles present at the location,
* including the reference base. The first allele should always be the reference allele, followed
* by an unordered list of alternate alleles.
*
* @return an alternate allele list
*/
@Override
public List<String> getAlleleList() {
return this.getFWDAlleles();
}
public boolean isHom() { return false; }
public boolean isHet() { return false; }
public double getHeterozygosity() { return 0.0; }

View File

@ -4,10 +4,12 @@ import net.sf.samtools.util.SequenceUtil;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.genotype.BasicGenotype;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
/**
@ -22,8 +24,8 @@ import java.util.List;
*/
public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod, VariantBackedByGenotype {
public GenomeLoc loc; // genome location of SNP
// Reference sequence chromosome or scaffold
// Start and stop positions in chrom
// Reference sequence chromosome or scaffold
// Start and stop positions in chrom
public String name; // Reference SNP identifier or Affy SNP name
public String strand; // Which DNA strand contains the observed alleles
@ -33,18 +35,18 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
public String molType; // Sample type from exemplar ss
public String varType; // The class of variant (simple, insertion, deletion, range, etc.)
// Can be 'unknown','single','in-del','het','microsatellite','named','mixed','mnp','insertion','deletion'
// Can be 'unknown','single','in-del','het','microsatellite','named','mixed','mnp','insertion','deletion'
public String validationStatus; // The validation status of the SNP
// one of set('unknown','by-cluster','by-frequency','by-submitter','by-2hit-2allele','by-hapmap')
// one of set('unknown','by-cluster','by-frequency','by-submitter','by-2hit-2allele','by-hapmap')
public double avHet; // The average heterozygosity from all observations
public double avHetSE; // The Standard Error for the average heterozygosity
public String func; // The functional category of the SNP (coding-synon, coding-nonsynon, intron, etc.)
// set('unknown','coding-synon','intron','cds-reference','near-gene-3','near-gene-5',
// 'nonsense','missense','frameshift','untranslated-3','untranslated-5','splice-3','splice-5')
// set('unknown','coding-synon','intron','cds-reference','near-gene-3','near-gene-5',
// 'nonsense','missense','frameshift','untranslated-3','untranslated-5','splice-3','splice-5')
public String locType; // How the variant affects the reference sequence
// enum('range','exact','between','rangeInsertion','rangeSubstitution','rangeDeletion')
// enum('range','exact','between','rangeInsertion','rangeSubstitution','rangeDeletion')
public int weight; // The quality of the alignment
@ -73,7 +75,7 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
*/
@Override
public String getReference() {
return getRefBasesFWD();
return refBases;
}
/**
@ -86,58 +88,43 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
return 4; // -log10(0.0001)
}
/**
* gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference base. This is returned as a string list with no guarantee ordering
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
* frequency).
*
* @return an alternate allele list
*/
public List<String> getAlternateAlleleList() {
List<String> ret = getAlleleList();
for (String allele : ret)
if (allele.equals(getReference())) ret.remove(allele);
return ret;
}
/**
* gets the alleles. This method should return all the alleles present at the location,
* including the reference base. The first allele should always be the reference allele, followed
* by an unordered list of alternate alleles.
*
* @return an alternate allele list
*/
public List<String> getAlleleList() {
List<String> ret; //ref first!!!!!
if (onFwdStrand())
ret = Arrays.asList(observed.split("/"));
else
ret = Arrays.asList(SequenceUtil.reverseComplement(observed).split("/"));
if (ret.size() > 0 && ret.contains(getReference()) && !ret.get(0).equals(this.getReference()))
Collections.swap(ret,ret.indexOf(getReference()),0);
return ret;
}
public boolean onFwdStrand() {
return strand.equals("+");
}
/**
* Returns bases in the reference allele as a String. String can be empty (as in insertion into
* the reference), can contain a single character (as in SNP or one-base deletion), or multiple characters
* (for longer indels).
*
* @return reference allele, forward strand
*/
public String getRefBasesFWD() {
// fix - at least this way we ensure that we'll get the other base compared to getAltBasesFWD()
return (getAllelesFWD().get(0).equals(refBases)) ? getAllelesFWD().get(0) : getAllelesFWD().get(1);
//if ( onFwdStrand() )
// return refBases;
//else
// return SequenceUtil.reverseComplement(refBases);
}
/**
* Returns reference (major) allele base for a SNP variant as a character; should throw IllegalStateException
* if variant is not a SNP.
*
* @return reference base on the forward strand
*/
public char getRefSnpFWD() throws IllegalStateException {
//System.out.printf("refbases is %s but %s%n", refBases, toString());
if (isIndel()) throw new IllegalStateException("Variant is not a SNP");
// fix - at least this way we ensure that we'll get the other base compared to getAltBasesFWD()
List<String> alleles = getAllelesFWD();
String val = (alleles.get(0).equals(refBases) ? alleles.get(0) : alleles.get(1));
return val.charAt(0);
// if ( onFwdStrand() ) return refBases.charAt(0);
// else return SequenceUtil.reverseComplement(refBases).charAt(0);
}
public List<String> getAllelesFWD() {
List<String> alleles = null;
if (onFwdStrand())
alleles = Arrays.asList(observed.split("/"));
else
alleles = Arrays.asList(SequenceUtil.reverseComplement(observed).split("/"));
//System.out.printf("getAlleles %s on %s %b => %s %n", observed, strand, onFwdStrand(), Utils.join("/", alleles));
return alleles;
}
public String getAllelesFWDString() {
return Utils.join("", getAllelesFWD());
}
/**
* get the frequency of this variant
*
@ -145,7 +132,7 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
*/
@Override
public double getNonRefAlleleFrequency() {
return 0; //To change body of implemented methods use File | Settings | File Templates.
return 0; // dbSNP doesn't know the allele frequency
}
/** @return the VARIANT_TYPE of the current variant */
@ -170,28 +157,6 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
return varType.contains("deletion");
}
/**
* get the base representation of this Variant
*
* @return a string, of ploidy
*/
@Override
public String getAlternateBases() {
return getAllelesFWDString();
}
/**
* gets the alternate bases. Use this method if teh allele count is greater then 2
*
* @return
*/
@Override
public List<String> getAlternateBaseList() {
List<String> list = new ArrayList<String>();
list.add(this.getAlternateBases());
return list;
}
public boolean isIndel() {
return isInsertion() || isDeletion() || varType.contains("in-del");
}
@ -204,11 +169,12 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
*/
@Override
public char getAlternativeBaseForSNP() {
return getAltSnpFWD(); /*
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
if (getAlternateBases().charAt(0) == this.getReference())
return getAlternateBases().charAt(1);
return getAlternateBases().charAt(0); */
if (!isSNP()) throw new StingException("We're not a SNP; called in DbSNP rod at position " + this.loc);
if (!isBiallelic()) throw new StingException("We're not biallelic; at position " + this.loc);
List<String> ret = this.getAlternateAlleleList();
if (ret.size() == 1 && ret.get(0).length() == 1)
return ret.get(0).charAt(0);
throw new StingException("getAlternativeBaseForSNP failed for DbSNP rod " + this.loc);
}
/**
@ -218,12 +184,14 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
*/
@Override
public char getReferenceForSNP() {
return 0; //To change body of implemented methods use File | Settings | File Templates.
if (!isSNP()) throw new StingException("We're not a SNP; called in DbSNP rod at position " + this.loc);
if (refBases.length() != 1) throw new StingException("The reference base in DbSNP must be zero, at position " + this.loc + " was " + refBases);
return refBases.charAt(0); // we know it's length 1, this is ok
}
public boolean isReference() {
return false;
} // snp locations are never "reference", there's always a variant
return false; // snp locations are never "reference", there's always a variant
}
public boolean isHapmap() {
return validationStatus.contains("by-hapmap");
@ -250,7 +218,7 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
}
public String toMediumString() {
String s = String.format("%s:%s:%s", getLocation().toString(), name, getAllelesFWDString());
String s = String.format("%s:%s:%s", getLocation().toString(), name, Utils.join("",this.getAlleleList()));
if (isSNP()) s += ":SNP";
if (isIndel()) s += ":Indel";
if (isHapmap()) s += ":Hapmap";
@ -300,49 +268,12 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
}
}
public String getAltBasesFWD() {
List<String> alleles = getAllelesFWD();
return (alleles.get(0).equals(refBases) ? alleles.get(1) : alleles.get(0));
}
public char getAltSnpFWD() throws IllegalStateException {
if (!isSNP())
throw new IllegalStateException("I'm not a SNP");
return getAltBasesFWD().charAt(0);
}
public double getConsensusConfidence() {
// TODO Auto-generated method stub
return Double.MAX_VALUE;
}
public List<String> getGenotype() throws IllegalStateException {
return Arrays.asList(Utils.join("", getAllelesFWD()));
}
public double getMAF() {
// Fixme: update to actually get MAF
//return avHet;
return -1;
}
public double getHeterozygosity() {
return avHet;
}
public int getPloidy() throws IllegalStateException {
// TODO Auto-generated method stub
return 0;
}
public double getVariationConfidence() {
// TODO Auto-generated method stub
return Double.MAX_VALUE;
}
public boolean isGenotype() {
// TODO Auto-generated method stub
return false;
return 2; // our DbSNP assumes a diploid human
}
public boolean isBiallelic() {
@ -350,10 +281,6 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
return observed.indexOf('/') == observed.lastIndexOf('/');
}
public int length() {
return (int) (loc.getStop() - loc.getStart() + 1);
}
/**
* get the genotype
*
@ -361,7 +288,10 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
*/
@Override
public org.broadinstitute.sting.utils.genotype.Genotype getCalledGenotype() {
return new BasicGenotype(this.getLocation(), this.getAltBasesFWD(), this.getRefSnpFWD(), this.getConsensusConfidence());
return new BasicGenotype(getLocation(),
BasicGenotype.alleleListToString(getAlleleList()),
Utils.stringToChar(getReference()),
getNegLog10PError());
}
/**
@ -371,11 +301,12 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
*/
@Override
public List<org.broadinstitute.sting.utils.genotype.Genotype> getGenotypes() {
List<org.broadinstitute.sting.utils.genotype.Genotype> list = new ArrayList<org.broadinstitute.sting.utils.genotype.Genotype>();
list.add(new BasicGenotype(this.getLocation(), this.getAltBasesFWD(), this.getRefSnpFWD(), this.getConsensusConfidence()));
ArrayList<Genotype> list = new ArrayList<Genotype>();
list.add(getCalledGenotype());
return list;
}
/**
* do we have the specified genotype? not all backedByGenotypes
* have all the genotype data.
@ -386,21 +317,21 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
*/
@Override
public boolean hasGenotype(DiploidGenotype x) {
return (!x.toString().equals(this.getAltBasesFWD())) ? false : true;
return (!x.toString().equals(BasicGenotype.alleleListToString(getAlleleList()))) ? false : true;
}
public static rodDbSNP getFirstRealSNP(RODRecordList<ReferenceOrderedDatum> dbsnpList) {
if ( dbsnpList == null )
return null;
if (dbsnpList == null)
return null;
rodDbSNP dbsnp = null;
for ( ReferenceOrderedDatum d : dbsnpList ) {
if ( ((rodDbSNP)d).isSNP() ) {
dbsnp = (rodDbSNP)d;
break;
}
}
rodDbSNP dbsnp = null;
for (ReferenceOrderedDatum d : dbsnpList) {
if (((rodDbSNP) d).isSNP()) {
dbsnp = (rodDbSNP) d;
break;
}
}
return dbsnp;
return dbsnp;
}
}

View File

@ -49,19 +49,20 @@ public class FastaAlternateReferenceWalker extends FastaReferenceWalker {
continue;
// if we have multiple variants at a locus, just take the first damn one we see for now
Variation variant = (Variation) rod;
if (variant.getAlleleList().size() != 2) System.err.println("Not two " + Utils.join("-",variant.getAlleleList()));
if (!rod.getName().startsWith("snpmask") && variant.isDeletion()) {
deletionBasesRemaining = variant.getAlternateBases().length();
deletionBasesRemaining = variant.getAlleleList().get(0).length();
basesSeen++;
if (indelsWriter != null)
indelsWriter.println(fasta.getCurrentID() + ":" + basesSeen + "-" + (basesSeen + variant.getAlternateBases().length()));
indelsWriter.println(fasta.getCurrentID() + ":" + basesSeen + "-" + (basesSeen + variant.getAlleleList().get(0).length()));
// delete the next n bases, not this one
return new Pair<GenomeLoc, String>(context.getLocation(), refBase);
} else if (!rod.getName().startsWith("snpmask") && variant.isInsertion()) {
basesSeen++;
if (indelsWriter != null)
indelsWriter.println(fasta.getCurrentID() + ":" + basesSeen + "-" + (basesSeen + variant.getAlternateBases().length()));
basesSeen += variant.getAlternateBases().length();
return new Pair<GenomeLoc, String>(context.getLocation(), refBase.concat(variant.getAlternateBases()));
indelsWriter.println(fasta.getCurrentID() + ":" + basesSeen + "-" + (basesSeen + variant.getAlleleList().get(0).length()));
basesSeen += variant.getAlleleList().get(0).length();
return new Pair<GenomeLoc, String>(context.getLocation(), refBase.concat(Utils.join("",variant.getAlleleList())));
} else if (variant.isSNP()) {
basesSeen++;
return new Pair<GenomeLoc, String>(context.getLocation(), (rod.getName().startsWith("snpmask") ? "N" : String.valueOf(variant.getAlternativeBaseForSNP())));

View File

@ -70,9 +70,9 @@ public class PickSequenomProbes extends RefWalker<String, String> {
if ( variant.isSNP() )
assay_sequence = leading_bases + "[" + refBase + "/" + variant.getAlternativeBaseForSNP() + "]" + trailing_bases;
else if ( variant.isInsertion() )
assay_sequence = leading_bases + refBase + "[-/" + variant.getAlternateBases() + "]" + trailing_bases;
assay_sequence = leading_bases + refBase + "[-/" + Utils.join("",variant.getAlleleList()) + "]" + trailing_bases;
else if ( variant.isDeletion() )
assay_sequence = leading_bases + refBase + "[" + variant.getAlternateBases() + "/-]" + trailing_bases.substring(variant.getAlternateBases().length());
assay_sequence = leading_bases + refBase + "[" + Utils.join("",variant.getAlleleList()) + "/-]" + trailing_bases.substring(variant.getAlleleList().size());
else
return "";

View File

@ -1,22 +1,22 @@
package org.broadinstitute.sting.playground.gatk.walkers;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.gatk.refdata.HapMapAlleleFrequenciesROD;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import net.sf.samtools.SAMRecord;
import java.util.List;
import java.util.Formatter;
import static java.lang.Math.log10;
import edu.mit.broad.picard.genotype.DiploidGenotype;
import net.sf.picard.PicardException;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.HapMapAlleleFrequenciesROD;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import static java.lang.Math.log10;
import java.util.Arrays;
import java.util.Formatter;
import java.util.List;
public class PopPriorWalker extends LocusWalker<Integer, Integer> {
@ -174,7 +174,7 @@ public class PopPriorWalker extends LocusWalker<Integer, Integer> {
priors = getKnownSiteKnownFreqPriors(((byte)(upRef & 0xff)), knownAlleles, hapmap.getVarAlleleFreq());
} else if (dbsnpInfo != null && dbsnpInfo.isSNP()) {
List<String> knownAlleles = dbsnpInfo.getAllelesFWD();
List<String> knownAlleles = Arrays.asList(Utils.join("",dbsnpInfo.getAlleleList()));
priorType = "DBSNP";
rodString = "[DBSNP: " + dbsnpInfo.toMediumString() + "]";

View File

@ -68,7 +68,8 @@ public class IndelSubsets implements ConcordanceType {
// only deal with a valid indel
Variation indel = ( indel1 != null ? indel1 : indel2 );
int size = ( indel.getAlternateBases().length() <= sizeCutoff ? 0 : 1 );
// we only deal with the first allele
int size = ( indel.getAlternateAlleleList().get(0).length() <= sizeCutoff ? 0 : 1 );
int homopol = ( homopolymerRunSize(ref, indel) <= homopolymerCutoff ? 0 : 1 );
writers[set1][set2][size][homopol].println(indel.toString());
@ -80,7 +81,7 @@ public class IndelSubsets implements ConcordanceType {
GenomeLoc locus = ref.getLocus();
int refBasePos = (int)(locus.getStart() - window.getStart());
char indelBase = indel.isDeletion() ? bases[refBasePos+1] : indel.getAlternateBases().charAt(0);
char indelBase = indel.isDeletion() ? bases[refBasePos+1] : indel.getAlternateAlleleList().get(0).charAt(0);
int leftRun = 0;
for ( int i = refBasePos; i >= 0; i--) {
if ( bases[i] != indelBase )
@ -88,9 +89,9 @@ public class IndelSubsets implements ConcordanceType {
leftRun++;
}
indelBase = indel.isDeletion() ? bases[Math.min(refBasePos+indel.getAlternateBases().length(),bases.length-1)] : indel.getAlternateBases().charAt(indel.getAlternateBases().length()-1);
indelBase = indel.isDeletion() ? bases[Math.min(refBasePos+indel.getAlternateAlleleList().get(0).length(),bases.length-1)] : indel.getAlternateAlleleList().get(0).charAt(indel.getAlternateAlleleList().get(0).length()-1);
int rightRun = 0;
for ( int i = refBasePos + (indel.isDeletion() ? 1+indel.getAlternateBases().length() : 1); i < bases.length; i++) {
for ( int i = refBasePos + (indel.isDeletion() ? 1+indel.getAlternateAlleleList().get(0).length() : 1); i < bases.length; i++) {
if ( bases[i] != indelBase )
break;
rightRun++;

View File

@ -12,10 +12,9 @@ import java.util.List;
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* <p/>
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*
*/
public class IndelMetricsAnalysis extends BasicVariantAnalysis implements GenotypeAnalysis, PopulationAnalysis {
long insertions = 0;
@ -30,19 +29,20 @@ public class IndelMetricsAnalysis extends BasicVariantAnalysis implements Genoty
}
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
if ( eval != null && eval.isInsertion() ) {
if ( eval.isInsertion() )
if (eval != null && eval.isInsertion()) {
if (eval.isInsertion())
insertions++;
else if ( eval.isDeletion() )
else if (eval.isDeletion())
deletions++;
else
throw new RuntimeException("Variation is indel, but isn't insertion or deletion!");
if ( eval.getAlternateBases().length() < 100 ) {
sizes[eval.isDeletion() ? 0 : 1][eval.getAlternateBases().length()]++;
if ( eval.getAlternateBases().length() > maxSize )
maxSize = eval.getAlternateBases().length();
}
for (String allele : eval.getAlleleList())
if (allele.length() < 100) {
sizes[eval.isDeletion() ? 0 : 1][allele.length()]++;
if (allele.length() > maxSize)
maxSize = allele.length();
}
}
return null;
@ -56,7 +56,7 @@ public class IndelMetricsAnalysis extends BasicVariantAnalysis implements Genoty
s.add("Size Distribution");
s.add("size\tdeletions\tinsertions");
for ( int i = 1; i <= maxSize; i++ )
for (int i = 1; i <= maxSize; i++)
s.add(String.format("%d\t%d\t\t%d", i, sizes[0][i], sizes[1][i]));
return s;

View File

@ -201,7 +201,7 @@ class PooledConcordanceTable {
public boolean pooledCallIsRef(Variation eval, char ref) {
// code broken out for easy alteration when we start using pool-specific variations
return eval.getAlternateBases().equalsIgnoreCase((Utils.dupString(ref,2)));
return Utils.join("",eval.getAlleleList()).equalsIgnoreCase((Utils.dupString(ref,2)));
}
public int calculateNumFrequencyIndeces(int poolSize) {
@ -225,7 +225,7 @@ class PooledConcordanceTable {
for ( Variation eval : evals ) {
if ( mismatchingCalls(firstEval, eval, ref) ) {
// todo -- make this not a StingException but go to the log
throw new StingException("Tri-Allelic Position "+eval.getAlternateBases()+"/"+firstEval.getAlternateBases() + " Ref: "+ ref + " not supported");
throw new StingException("Tri-Allelic Position "+Utils.join("",eval.getAlleleList())+"/"+Utils.join("",firstEval.getAlleleList()) + " Ref: "+ ref + " not supported");
} else {
alternateFrequency += calledVariantFrequency(eval,ref);
}
@ -249,10 +249,10 @@ class PooledConcordanceTable {
public boolean mismatchingCalls(Variation eval, Variation chip, char ref) {
// eval and chip guaranteed to be non-null
char chipF = chip.getAlternateBases().charAt(0);
char chipS = chip.getAlternateBases().charAt(1);
char evalF = chip.getAlternateBases().charAt(0);
char evalS = chip.getAlternateBases().charAt(1);
char chipF = Utils.stringToChar(chip.getAlleleList().get(0));
char chipS = Utils.stringToChar(chip.getAlleleList().get(1));
char evalF = Utils.stringToChar(eval.getAlleleList().get(0));
char evalS = Utils.stringToChar(eval.getAlleleList().get(1));
boolean mismatch;
if (chipF == ref) {
if ( chipS == ref ) {
@ -277,7 +277,7 @@ class PooledConcordanceTable {
public double calledVariantFrequency( Variation var, char ref ) {
// code broken out for easy alteration when we start using pool-specific variations
String varStr = var.getAlternateBases();
String varStr = Utils.join("",var.getAlleleList());
double freq;
if ( varStr.charAt(0) != ref && varStr.charAt(1) != ref ) {
freq = (double) 2;

View File

@ -1,7 +1,9 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.refdata.BrokenRODSimulator;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.util.ArrayList;
@ -90,9 +92,9 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
}
public boolean discordantP(Variation dbSNP, Variation eval) {
if (eval != null) {
char alt = (eval.isSNP()) ? eval.getAlternativeBaseForSNP() : eval.getReference().charAt(0);
char alt = (eval.isSNP()) ? eval.getAlternativeBaseForSNP() : Utils.stringToChar(eval.getReference());
if (dbSNP != null && dbSNP.isSNP())
return !dbSNP.getAlternateBases().contains(String.valueOf(alt));
return !dbSNP.getAlleleList().contains(String.valueOf(alt));
}
return false;
}
@ -125,7 +127,7 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
if (dbsnp.isSNP() && eval.isSNP() && discordantP(dbsnp, eval)) {
return String.format("Discordant [DBSNP %s] [EVAL %s]", dbsnp, eval);
} else if (dbsnp.isIndel() && eval.isSNP()) {
return String.format("SNP-at-indel DBSNP=%s %s", dbsnp.getAlternateBases(), eval);
return String.format("SNP-at-indel DBSNP=%s %s", Utils.join("",dbsnp.getAlleleList()), eval);
} else {
return null;
}

View File

@ -81,7 +81,7 @@ public class GenotypeUtils {
if ( var instanceof Genotype )
return ((Genotype)var).isHet();
String genotype = var.getAlternateBases();
String genotype = Utils.join("",var.getAlleleList());
if ( genotype.length() < 1 )
return false;

View File

@ -2,16 +2,11 @@ package org.broadinstitute.sting.utils;
import net.sf.samtools.*;
import net.sf.samtools.util.StringUtil;
import net.sf.picard.reference.ReferenceSequenceFile;
import java.util.*;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.BufferedReader;
import org.apache.log4j.Logger;
import java.io.File;
import java.util.*;
/**
* Created by IntelliJ IDEA.
* User: depristo
@ -20,9 +15,7 @@ import org.apache.log4j.Logger;
* To change this template use File | Settings | File Templates.
*/
public class Utils {
/**
* our log, which we want to capture anything from this class
*/
/** our log, which we want to capture anything from this class */
private static Logger logger = Logger.getLogger(Utils.class);
public static void warnUser(final String msg) {
@ -45,25 +38,28 @@ public class Utils {
/**
* Compares two objects, either of which might be null.
*
* @param lhs One object to compare.
* @param rhs The other object to compare.
*
* @return True if the two objects are equal, false otherwise.
*/
public static boolean equals(Object lhs, Object rhs) {
if( lhs == null && rhs == null ) return true;
else if( lhs == null ) return false;
if (lhs == null && rhs == null) return true;
else if (lhs == null) return false;
else return lhs.equals(rhs);
}
public static <T> List<T> cons(final T elt, final List<T> l) {
List<T> l2 = new ArrayList<T>();
l2.add(elt);
if ( l != null ) l2.addAll(l);
if (l != null) l2.addAll(l);
return l2;
}
/**
* pretty print the warning message supplied
*
* @param message the message
*/
private static void prettyPrintWarningMessage(String message) {
@ -71,13 +67,13 @@ public class Utils {
while (builder.length() > 70) {
int space = builder.lastIndexOf(" ", 70);
if (space <= 0) space = 70;
logger.warn(String.format("* %s", builder.substring(0,space)));
builder.delete(0,space + 1);
logger.warn(String.format("* %s", builder.substring(0, space)));
builder.delete(0, space + 1);
}
logger.warn(String.format("* %s", builder));
}
public static SAMFileHeader copySAMFileHeader( SAMFileHeader toCopy ) {
public static SAMFileHeader copySAMFileHeader(SAMFileHeader toCopy) {
SAMFileHeader copy = new SAMFileHeader();
copy.setSortOrder(toCopy.getSortOrder());
@ -86,9 +82,9 @@ public class Utils {
copy.setReadGroups(toCopy.getReadGroups());
copy.setSequenceDictionary(toCopy.getSequenceDictionary());
for ( Map.Entry<String, Object> e : toCopy.getAttributes())
for (Map.Entry<String, Object> e : toCopy.getAttributes())
copy.setAttribute(e.getKey(), e.getValue());
return copy;
}
@ -104,6 +100,7 @@ public class Utils {
*
* @param pred filtering condition ( objects, for which pred.apply() is true pass the filter )
* @param c collection to filter (will not be modified)
*
* @return new list built from elements of <c> passing the filter
* @see #filterInPlace(Predicate pred, Collection c)
*/
@ -135,6 +132,7 @@ public class Utils {
*
* @param pred filtering condition (only elements, for which pred.apply() is true will be kept in the collection)
* @param c collection to filter (will be modified - should be mutable and should implement remove() )
*
* @return reference to the same (modified) collection <c>
* @see #filter(Predicate pred, Collection c)
*/
@ -175,12 +173,12 @@ public class Utils {
public static ArrayList<Byte> subseq(char[] fullArray) {
byte[] fullByteArray = new byte[fullArray.length];
StringUtil.charsToBytes(fullArray,0,fullArray.length,fullByteArray,0);
StringUtil.charsToBytes(fullArray, 0, fullArray.length, fullByteArray, 0);
return subseq(fullByteArray);
}
public static ArrayList<Byte> subseq(byte[] fullArray) {
return subseq(fullArray, 0, fullArray.length-1);
return subseq(fullArray, 0, fullArray.length - 1);
}
public static ArrayList<Byte> subseq(byte[] fullArray, int start, int end) {
@ -204,9 +202,9 @@ public class Utils {
public static boolean is454Read(SAMRecord read) {
SAMReadGroupRecord readGroup = read.getReadGroup();
if ( readGroup != null ) {
if (readGroup != null) {
Object readPlatformAttr = readGroup.getAttribute("PL");
if ( readPlatformAttr != null )
if (readPlatformAttr != null)
return readPlatformAttr.toString().toUpperCase().contains("454");
}
return false;
@ -248,7 +246,7 @@ public class Utils {
return "";
}
StringBuilder ret = new StringBuilder(strings[start]);
for (int i = start+1; i < end; ++i) {
for (int i = start + 1; i < end; ++i) {
ret.append(separator);
ret.append(strings[i]);
}
@ -332,13 +330,13 @@ public class Utils {
public static Integer[] SortPermutation(final double[] A) {
class comparator implements Comparator<Integer> {
public int compare(Integer a, Integer b) {
if (A[a.intValue()] < A[ b.intValue() ]) {
if (A[a.intValue()] < A[b.intValue()]) {
return -1;
}
if (A[ a.intValue() ] == A[ b.intValue() ]) {
if (A[a.intValue()] == A[b.intValue()]) {
return 0;
}
if (A[ a.intValue() ] > A[ b.intValue() ]) {
if (A[a.intValue()] > A[b.intValue()]) {
return 1;
}
return 0;
@ -401,8 +399,7 @@ public class Utils {
return output;
}
public static <T> List<T> PermuteList(List<T> list, Integer[] permutation)
{
public static <T> List<T> PermuteList(List<T> list, Integer[] permutation) {
List<T> output = new ArrayList<T>();
for (int i = 0; i < permutation.length; i++) {
output.add(list.get(permutation[i]));
@ -412,47 +409,59 @@ public class Utils {
/** Draw N random elements from list. */
public static <T> List<T> RandomSubset(List<T> list, int N)
{
if (list.size() <= N) { return list; }
public static <T> List<T> RandomSubset(List<T> list, int N) {
if (list.size() <= N) {
return list;
}
java.util.Random random = new java.util.Random();
int idx[] = new int[list.size()];
for (int i = 0; i < list.size(); i++) { idx[i] = random.nextInt(); }
for (int i = 0; i < list.size(); i++) {
idx[i] = random.nextInt();
}
Integer[] perm = SortPermutation(idx);
Integer[] perm = SortPermutation(idx);
List<T> ans = new ArrayList<T>();
for (int i = 0; i < N; i++) { ans.add(list.get(perm[i])); }
for (int i = 0; i < N; i++) {
ans.add(list.get(perm[i]));
}
return ans;
}
// lifted from the internet
// http://www.cs.princeton.edu/introcs/91float/Gamma.java.html
public static double logGamma(double x)
{
double tmp = (x - 0.5) * Math.log(x + 4.5) - (x + 4.5);
double ser = 1.0 + 76.18009173 / (x + 0) - 86.50532033 / (x + 1)
+ 24.01409822 / (x + 2) - 1.231739516 / (x + 3)
+ 0.00120858003 / (x + 4) - 0.00000536382 / (x + 5);
return tmp + Math.log(ser * Math.sqrt(2 * Math.PI));
public static double logGamma(double x) {
double tmp = (x - 0.5) * Math.log(x + 4.5) - (x + 4.5);
double ser = 1.0 + 76.18009173 / (x + 0) - 86.50532033 / (x + 1)
+ 24.01409822 / (x + 2) - 1.231739516 / (x + 3)
+ 0.00120858003 / (x + 4) - 0.00000536382 / (x + 5);
return tmp + Math.log(ser * Math.sqrt(2 * Math.PI));
}
public static double percentage(double x, double base) { return (base> 0 ? (x/base)*100.0 : 0); }
public static double percentage(int x, int base) { return (base> 0 ? ((double)x/(double)base)*100.0 : 0); }
public static double percentage(long x, long base) { return (base> 0 ? ((double)x/(double)base)*100.0 : 0); }
public static double percentage(double x, double base) {
return (base > 0 ? (x / base) * 100.0 : 0);
}
public static String dupString( char c, int nCopies ) {
public static double percentage(int x, int base) {
return (base > 0 ? ((double) x / (double) base) * 100.0 : 0);
}
public static double percentage(long x, long base) {
return (base > 0 ? ((double) x / (double) base) * 100.0 : 0);
}
public static String dupString(char c, int nCopies) {
char[] chars = new char[nCopies];
Arrays.fill(chars,c);
Arrays.fill(chars, c);
return new String(chars);
}
public static int countOccurrences(char c, String s) {
int count = 0;
for ( int i = 0; i < s.length(); i++ ) {
for (int i = 0; i < s.length(); i++) {
count += s.charAt(i) == c ? 1 : 0;
}
return count;
@ -460,17 +469,17 @@ public class Utils {
public static <T> int countOccurrences(T x, List<T> l) {
int count = 0;
for ( T y : l ) {
if ( x.equals(y) ) count++;
for (T y : l) {
if (x.equals(y)) count++;
}
return count;
}
public static byte listMaxByte(List<Byte> quals) {
if ( quals.size() == 0 ) return 0;
if (quals.size() == 0) return 0;
byte m = quals.get(0);
for ( byte b : quals ) {
for (byte b : quals) {
m = b > m ? b : m;
}
return m;
@ -479,67 +488,84 @@ public class Utils {
/** Returns indices of all occurrences of the specified symbol in the string */
public static int[] indexOfAll(String s, int ch) {
int[] pos = new int[64];
int z = 0;
for ( int i = 0 ; i < s.length() ; i++ ) {
if ( s.charAt(i) == ch ) pos[z++] = i;
}
return reallocate(pos,z);
int[] pos = new int[64];
int z = 0;
for (int i = 0; i < s.length(); i++) {
if (s.charAt(i) == ch) pos[z++] = i;
}
return reallocate(pos, z);
}
/** Returns new (reallocated) integer array of the specified size, with content
/**
* Returns new (reallocated) integer array of the specified size, with content
* of the original array <code>orig</code> copied into it. If <code>newSize</code> is
* less than the size of the original array, only first <code>newSize</code> elements will be copied.
* If new size is greater than the size of the original array, the content of the original array will be padded
* with zeros up to the new size. Finally, if new size is the same as original size, no memory reallocation
* will be performed and the original array will be returned instead.
* with zeros up to the new size. Finally, if new size is the same as original size, no memory reallocation
* will be performed and the original array will be returned instead.
*
* @param orig
* @param newSize
*
* @return
*/
public static int[] reallocate(int[] orig, int newSize) {
if ( orig.length == newSize ) return orig;
int[] new_array = new int[newSize];
int L = ( newSize > orig.length ? orig.length : newSize );
for ( int i = 0 ; i < L ; i++ ) new_array[i] = orig[i];
return new_array;
if (orig.length == newSize) return orig;
int[] new_array = new int[newSize];
int L = (newSize > orig.length ? orig.length : newSize);
for (int i = 0; i < L; i++) new_array[i] = orig[i];
return new_array;
}
/* TEST ME
public static void main(String[] argv) {
List<Integer> l1 = new LinkedList<Integer>();
List<Integer> l2 = new ArrayList<Integer>();
public static void main(String[] argv) {
List<Integer> l1 = new LinkedList<Integer>();
List<Integer> l2 = new ArrayList<Integer>();
l1.add(1);
l1.add(5);
l1.add(3);
l1.add(10);
l1.add(4);
l1.add(2);
l2.add(1);
l2.add(5);
l2.add(3);
l2.add(10);
l2.add(4);
l2.add(2);
l1.add(1);
l1.add(5);
l1.add(3);
l1.add(10);
l1.add(4);
l1.add(2);
l2.add(1);
l2.add(5);
l2.add(3);
l2.add(10);
l2.add(4);
l2.add(2);
Predicate<Integer> p = new Predicate<Integer>() {
public boolean apply(Integer i) {
return i > 2;
}
};
filterInPlace(p, l1);
filterInPlace(p, l2);
Predicate<Integer> p = new Predicate<Integer>() {
public boolean apply(Integer i) {
return i > 2;
}
};
filterInPlace(p, l1);
filterInPlace(p, l2);
for ( int i = 0 ; i < l1.size(); i++ ) System.out.print(" "+l1.get(i));
System.out.println();
for ( int i = 0 ; i < l2.size(); i++ ) System.out.print(" " + l2.get(i));
System.out.println();
for ( int i = 0 ; i < l1.size(); i++ ) System.out.print(" "+l1.get(i));
System.out.println();
for ( int i = 0 ; i < l2.size(); i++ ) System.out.print(" " + l2.get(i));
System.out.println();
}
*/
/**
* a helper method. Turns a single character string into a char.
*
* @param str the string
*
* @return a char
*/
public static char stringToChar(String str) {
if (str.length() != 1) throw new IllegalArgumentException("String length must be one");
return str.charAt(0);
}
*/
}

View File

@ -2,6 +2,8 @@ package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.List;
/**
* @author aaron
@ -153,4 +155,17 @@ public class BasicGenotype implements Genotype {
if (!isVariant(this.mRef)) throw new IllegalStateException("this genotype is not a variant");
return new BasicVariation(this.getBases(), String.valueOf(mRef), this.getBases().length(), mLocation, mNegLog10PError);
}
/**
* Turn a list of alleles into a genotype
* @param alleles the list of alleles
* @return a string representation of this list
*/
public static String alleleListToString(List<String> alleles) {
StringBuilder builder = new StringBuilder();
for (String allele : alleles)
builder.append(allele);
return builder.toString();
}
}

View File

@ -1,9 +1,11 @@
package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils;
import java.util.List;
import java.util.ArrayList;
import java.util.List;
/**
* User: aaron
@ -40,6 +42,7 @@ public class BasicVariation implements Variation {
public BasicVariation(String bases, String reference, int length, GenomeLoc location, double confidence) {
mBases = bases;
mRef = reference;
if (mRef.length() != 1) throw new StingException("The reference must be a single base");
mLength = length;
mLocation = location;
mConfidence = confidence;
@ -82,23 +85,6 @@ public class BasicVariation implements Variation {
return (mLength < 0);
}
@Override
public String getAlternateBases() {
return mBases;
}
/**
* gets the alternate bases. Use this method if teh allele count is greater then 2
*
* @return
*/
@Override
public List<String> getAlternateBaseList() {
List<String> list = new ArrayList<String>();
list.add(this.getAlternateBases());
return list;
}
@Override
public GenomeLoc getLocation() {
return mLocation;
@ -112,7 +98,7 @@ public class BasicVariation implements Variation {
/** are we bi-allelic? */
@Override
public boolean isBiallelic() {
return true;
return (getAlternateAlleleList().size() == 1);
}
@Override
@ -120,6 +106,40 @@ public class BasicVariation implements Variation {
return mConfidence;
}
/**
* gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference base. This is returned as a string list with no guarantee ordering
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
* frequency).
*
* @return an alternate allele list
*/
@Override
public List<String> getAlternateAlleleList() {
List<String> list = new ArrayList<String>();
for (char c : this.mBases.toCharArray())
if (c != Utils.stringToChar(mRef))
list.add(String.valueOf(c));
return list;
}
/**
* gets the alleles. This method should return all the alleles present at the location,
* including the reference base. The first allele should always be the reference allele, followed
* by an unordered list of alternate alleles.
*
* @return an alternate allele list
*/
@Override
public List<String> getAlleleList() {
List<String> list = new ArrayList<String>();
if (this.mBases.contains(mRef)) list.add(mRef);
for (char c : this.mBases.toCharArray())
if (c != Utils.stringToChar(mRef))
list.add(String.valueOf(c));
return list;
}
@Override
public boolean isReference() {
if (mLength != 0) return false;
@ -149,11 +169,8 @@ public class BasicVariation implements Variation {
@Override
public char getAlternativeBaseForSNP() {
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
// we know that if we're a snp, the reference is a single base, so charAt(0) is safe
if (getAlternateBases().charAt(0) == this.getReference().charAt(0))
return getAlternateBases().charAt(1);
return getAlternateBases().charAt(0);
if (!this.isBiallelic() || this.getAlternateAlleleList().size() != 1) throw new IllegalStateException("we're not biallelic");
return Utils.stringToChar(this.getAlternateAlleleList().get(0));
}
/**
@ -164,11 +181,8 @@ public class BasicVariation implements Variation {
@Override
public char getReferenceForSNP() {
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
// we know that if we're a snp, the reference is a single base, so charAt(0) is safe
if (getAlternateBases().charAt(0) == this.getReference().charAt(0))
return getAlternateBases().charAt(0);
return getAlternateBases().charAt(1);
if (!this.isBiallelic()) throw new IllegalStateException("we're not biallelic");
return Utils.stringToChar(this.mRef);
}

View File

@ -14,14 +14,21 @@ import java.util.List;
public interface Variation {
// the types of variants we currently allow
public enum VARIANT_TYPE {
SNP, INDEL, REFERENCE // though reference is not really a variant
SNP, INDEL, REFERENCE // though reference is not really a variant, we need to represent it
}
/** are we bi-allelic? */
public boolean isBiallelic();
/**
* get the frequency of this variant, if we're a variant. If we're reference this method
* should return 0.
* should return 0. If we can't provide an alternate allele frequency, this should also
* return 0.
*
* @return double with the stored frequency
* WARNING: This method is only valid for biAllelic data, the contract is to check isBiallelic()
* before calling this method
*
* @return double the minor allele frequency
*/
public double getNonRefAlleleFrequency();
@ -32,7 +39,8 @@ public interface Variation {
public VARIANT_TYPE getType();
/**
* are we a SNP? If not we're a Indel/deletion or the reference
* are we a SNP? If not we're a Indel/deletion or the reference. This method must be call before you use
* the convenience methods getAlternativeBaseForSNP or getReferenceForSNP, to ensure that you're working with a SNP
*
* @return true if we're a SNP
*/
@ -60,22 +68,26 @@ public interface Variation {
public boolean isReference();
/**
* get the location that this Variant represents
* are we an insertion or a deletion? yes, then return true. No? false.
*
* @return true if we're an insertion or deletion
*/
public boolean isIndel();
/**
* get the location of this Variant
*
* @return a GenomeLoc
*/
public GenomeLoc getLocation();
/**
* get the reference base(s) at this position
* get the reference base(s) for this Variant
*
* @return the reference base or bases, as a string
*/
public String getReference();
/** are we bi-allelic? */
public boolean isBiallelic();
/**
* get the -1 * (log 10 of the error value)
*
@ -83,26 +95,26 @@ public interface Variation {
*/
public double getNegLog10PError();
/**
* gets the alternate base. Use this method if we're biallelic
*
* @return
*/
public String getAlternateBases();
/**
* gets the alternate bases. Use this method if the allele count is greater then 2 (not biallelic)
* gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference base. This is returned as a string list with no guarantee ordering
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
* frequency).
*
* @return
* @return an alternate allele list
*/
public List<String> getAlternateBaseList();
public List<String> getAlternateAlleleList();
/**
* are we an insertion or a deletion? yes, then return true. No? false.
* gets the alleles. This method should return all the alleles present at the location,
* including the reference base. The first allele should always be the reference allele, followed
* by an unordered list of alternate alleles. If the reference base is not an allele in this varation
* it will not be in the list (i.e. there is no guarantee that the reference base is in the list).
*
* @return true if we're an insertion or deletion
* @return an alternate allele list
*/
public boolean isIndel();
public List<String> getAlleleList();
/**
* gets the alternate base is the case of a SNP. Throws an IllegalStateException if we're not a SNP

View File

@ -56,10 +56,10 @@ public class rodDbSNPTest extends BaseTest {
rodDbSNP var = (rodDbSNP)rod;
if (rod.isSNP()) {
// quick check, if we're not triallelic, make sure the ref is right
if (var.getRefSnpFWD() == var.refBases.charAt(0) || var.getAltSnpFWD() == var.refBases.charAt(0))
if (var.getReferenceForSNP() == var.refBases.charAt(0) || var.getAlternativeBaseForSNP() == var.refBases.charAt(0))
// also make sure the ref is a single character
if (var.refBases.length() == 1)
Assert.assertTrue(var.refBases.charAt(0)==var.getRefSnpFWD());
Assert.assertTrue(var.refBases.charAt(0)==var.getReferenceForSNP());
if (var.getLocation().getContig().equals("1") &&
var.getLocation().getStart() >= 10000000 &&
var.getLocation().getStart() <= 11000000) {

View File

@ -82,7 +82,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest {
@Test
public void testEvalVariantRODOutputViolations() {
List<String> md5 = new ArrayList<String>();
md5.add("ad2ca71dfa7e45f369380178c4f8e69f");
md5.add("d84e5b2a23ab1cf028145f09cd1e9f5b");
/**
* the above MD5 was calculated from running the following command: