intermediate commit for some changes in the Variation system, so Eric can go ahead with his changes. Everything is pretty set, but the Variation interface could use a convenience method that joins all the alternate alleles.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1903 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
6c338eccb8
commit
ad1fc511b1
|
|
@ -2,8 +2,9 @@ package org.broadinstitute.sting.gatk.refdata;
|
|||
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* loc ref alt EM_alt_freq discovery_likelihood discovery_null discovery_prior discovery_lod EM_N n_ref n_het n_hom
|
||||
* chr1:1104840 A N 0.000000 -85.341265 -85.341265 0.000000 0.000000 324.000000 162 0 0
|
||||
|
|
@ -23,28 +24,6 @@ public class PooledEMSNPROD extends TabularROD implements SNPCallFromGenotypes,
|
|||
public char getAltSnpFWD() throws IllegalStateException { return getAltBasesFWD().charAt(0); }
|
||||
public boolean isReference() { return getVariationConfidence() < 0.01; }
|
||||
|
||||
/**
|
||||
* gets the alternate base. Use this method if we're biallelic
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public String getAlternateBases() {
|
||||
return getAltBasesFWD();
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate bases. Use this method if the allele count is greater then 2
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public List<String> getAlternateBaseList() {
|
||||
List<String> str = new ArrayList<String>();
|
||||
str.add(this.getAltBasesFWD());
|
||||
return str;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the frequency of this variant
|
||||
*
|
||||
|
|
@ -120,6 +99,33 @@ public class PooledEMSNPROD extends TabularROD implements SNPCallFromGenotypes,
|
|||
return this.getVariationConfidence();
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate alleles. This method should return all the alleles present at the location,
|
||||
* NOT including the reference base. This is returned as a string list with no guarantee ordering
|
||||
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
|
||||
* frequency).
|
||||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
@Override
|
||||
public List<String> getAlternateAlleleList() {
|
||||
return Arrays.asList(getAltBasesFWD());
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alleles. This method should return all the alleles present at the location,
|
||||
* including the reference base. The first allele should always be the reference allele, followed
|
||||
* by an unordered list of alternate alleles.
|
||||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
@Override
|
||||
public List<String> getAlleleList() {
|
||||
List<String> alleles = Arrays.asList(this.getReference());
|
||||
alleles.addAll(getAlternateAlleleList());
|
||||
return alleles;
|
||||
}
|
||||
|
||||
public int length() { return 1; }
|
||||
|
||||
// SNPCallFromGenotypes interface
|
||||
|
|
|
|||
|
|
@ -161,10 +161,9 @@ public class RodGLF implements VariationRod, Iterator<RodGLF> {
|
|||
@Override
|
||||
public char getAlternativeBaseForSNP() {
|
||||
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
|
||||
if (getAlternateBases().charAt(0) == this.getReference().charAt(0))
|
||||
return getAlternateBases().charAt(1);
|
||||
return getAlternateBases().charAt(0);
|
||||
|
||||
List<String> alleles = this.getAlternateAlleleList();
|
||||
if (alleles.size() != 1) throw new StingException("We're not biAllelic()");
|
||||
return Utils.stringToChar(alleles.get(0));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -175,10 +174,7 @@ public class RodGLF implements VariationRod, Iterator<RodGLF> {
|
|||
@Override
|
||||
public char getReferenceForSNP() {
|
||||
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
|
||||
if (getAlternateBases().charAt(0) == this.getReference().charAt(0))
|
||||
return getAlternateBases().charAt(0);
|
||||
return getAlternateBases().charAt(1);
|
||||
|
||||
return Utils.stringToChar(getReference());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -207,7 +203,7 @@ public class RodGLF implements VariationRod, Iterator<RodGLF> {
|
|||
* Get the nth best genotype (one based), i.e. to get the best genotype pass in 1,
|
||||
* the second best 2, etdc.
|
||||
*
|
||||
* @param nthBest the nth best genotype to get
|
||||
* @param nthBest the nth best genotype to get (1 based, NOT ZERO BASED)
|
||||
*
|
||||
* @return a GENOTYPE object representing the nth best genotype
|
||||
*/
|
||||
|
|
@ -253,28 +249,6 @@ public class RodGLF implements VariationRod, Iterator<RodGLF> {
|
|||
((VariableLengthCall) mRecord).getIndelLen1() < 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the base representation of this Variant
|
||||
*
|
||||
* @return a string, of ploidy
|
||||
*/
|
||||
@Override
|
||||
public String getAlternateBases() {
|
||||
return this.getBestGenotype(1).toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate bases. Use this method if teh allele count is greater then 2
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public List<String> getAlternateBaseList() {
|
||||
List<String> list = new ArrayList<String>();
|
||||
list.add(this.getAlternateBases());
|
||||
return list;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns minor allele frequency.
|
||||
*
|
||||
|
|
@ -310,6 +284,42 @@ public class RodGLF implements VariationRod, Iterator<RodGLF> {
|
|||
return Math.abs(getBestGenotypeValue(1) - ((SinglePointCall) mRecord).getLikelihoods()[index]) / GLFRecord.LIKELIHOOD_SCALE_FACTOR;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate alleles. This method should return all the alleles present at the location,
|
||||
* NOT including the reference base. This is returned as a string list with no guarantee ordering
|
||||
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
|
||||
* frequency).
|
||||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
@Override
|
||||
public List<String> getAlternateAlleleList() {
|
||||
LikelihoodObject.GENOTYPE genotype = getBestGenotype(1);
|
||||
List<String> ret = new ArrayList<String>();
|
||||
for (char c : genotype.toString().toCharArray()) {
|
||||
if (!String.valueOf(c).equals(this.getReference())) ret.add(String.valueOf(c));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alleles. This method should return all the alleles present at the location,
|
||||
* including the reference base. The first allele should always be the reference allele, followed
|
||||
* by an unordered list of alternate alleles.
|
||||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
@Override
|
||||
public List<String> getAlleleList() {
|
||||
LikelihoodObject.GENOTYPE genotype = getBestGenotype(1);
|
||||
List<String> list = new ArrayList<String>();
|
||||
if (genotype.toString().contains(this.getReference())) list.add(this.getReference());
|
||||
for (char c : genotype.toString().toCharArray())
|
||||
if (c != Utils.stringToChar(getReference()))
|
||||
list.add(String.valueOf(c));
|
||||
return list;
|
||||
}
|
||||
|
||||
public int length() {
|
||||
return 1;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.refdata;
|
|||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.genotype.BasicGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
|
|
@ -134,6 +135,40 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements Variation
|
|||
return Math.abs(lodBtr);
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate alleles. This method should return all the alleles present at the location,
|
||||
* NOT including the reference base. This is returned as a string list with no guarantee ordering
|
||||
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
|
||||
* frequency).
|
||||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
@Override
|
||||
public List<String> getAlternateAlleleList() {
|
||||
List<String> list = new ArrayList<String>();
|
||||
for (char base : bestGenotype.toCharArray())
|
||||
if (base != refBase)
|
||||
list.add(String.valueOf(base));
|
||||
return list;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alleles. This method should return all the alleles present at the location,
|
||||
* including the reference base. The first allele should always be the reference allele, followed
|
||||
* by an unordered list of alternate alleles.
|
||||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
@Override
|
||||
public List<String> getAlleleList() {
|
||||
List<String> list = new ArrayList<String>();
|
||||
if (this.bestGenotype.contains(getReference())) list.add(getReference());
|
||||
for (char c : this.bestGenotype.toCharArray())
|
||||
if (c != Utils.stringToChar(getReference()))
|
||||
list.add(String.valueOf(c));
|
||||
return list;
|
||||
}
|
||||
|
||||
public String getRefBasesFWD() {
|
||||
return String.format("%c", getRefSnpFWD());
|
||||
}
|
||||
|
|
@ -147,7 +182,7 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements Variation
|
|||
}
|
||||
|
||||
public char getAltSnpFWD() throws IllegalStateException {
|
||||
// both ref and bestGenotype have been uppercased, so it's safe to use ==
|
||||
// both ref and bestGenotype have been uppercased, so it's safe to use ==
|
||||
char c = (bestGenotype.charAt(0) == refBase) ? bestGenotype.charAt(1) : bestGenotype.charAt(0);
|
||||
//System.out.printf("%s : %c and %c%n", bestGenotype, refBase, c);
|
||||
return c;
|
||||
|
|
@ -187,28 +222,6 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements Variation
|
|||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the base representation of this Variant
|
||||
*
|
||||
* @return a string, of ploidy
|
||||
*/
|
||||
@Override
|
||||
public String getAlternateBases() {
|
||||
return this.bestGenotype;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate bases. If this is homref, throws an UnsupportedOperationException
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public List<String> getAlternateBaseList() {
|
||||
List<String> list = new ArrayList<String>();
|
||||
list.add(this.getAlternateBases());
|
||||
return list;
|
||||
}
|
||||
|
||||
public boolean isIndel() {
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,10 +2,13 @@ package org.broadinstitute.sting.gatk.refdata;
|
|||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
import org.broadinstitute.sting.utils.genotype.BasicGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.regex.MatchResult;
|
||||
import java.util.regex.Pattern;
|
||||
|
|
@ -96,6 +99,34 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements
|
|||
return 4; // 1/10000 error
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate alleles. This method should return all the alleles present at the location,
|
||||
* NOT including the reference base. This is returned as a string list with no guarantee ordering
|
||||
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
|
||||
* frequency).
|
||||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
@Override
|
||||
public List<String> getAlternateAlleleList() {
|
||||
throw new StingException("Hapmap is unable to provide an alternate allele list; the reference is unknown");
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alleles. This method should return all the alleles present at the location,
|
||||
* including the reference base. The first allele should always be the reference allele, followed
|
||||
* by an unordered list of alternate alleles.
|
||||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
@Override
|
||||
public List<String> getAlleleList() {
|
||||
List<String> ret = new ArrayList<String>();
|
||||
for (char c : feature.toCharArray())
|
||||
ret.add(String.valueOf(c));
|
||||
return ret;
|
||||
}
|
||||
|
||||
public String getAttribute(final String key) {
|
||||
return attributes.get(key);
|
||||
}
|
||||
|
|
@ -181,28 +212,6 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements
|
|||
public char getAltSnpFWD() throws IllegalStateException { return 0; }
|
||||
public boolean isReference() { return ! isSNP(); }
|
||||
|
||||
/**
|
||||
* gets the alternate bases. If this is homref, throws an UnsupportedOperationException
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public String getAlternateBases() {
|
||||
return this.feature;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate bases. Use this method if teh allele count is greater then 2
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public List<String> getAlternateBaseList() {
|
||||
List<String> list = new ArrayList<String>();
|
||||
list.add(this.getAlternateBases());
|
||||
return list;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the frequency of this variant
|
||||
*
|
||||
|
|
|
|||
|
|
@ -183,7 +183,7 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
|
|||
/** are we bi-allelic? */
|
||||
@Override
|
||||
public boolean isBiallelic() {
|
||||
return (this.getAlternateBaseList().size() == 1);
|
||||
return (this.getAlternateAlleleList().size() == 1);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -197,6 +197,37 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
|
|||
return mCurrentRecord.getQual() / 10.0;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate alleles. This method should return all the alleles present at the location,
|
||||
* NOT including the reference base. This is returned as a string list with no guarantee ordering
|
||||
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
|
||||
* frequency).
|
||||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
@Override
|
||||
public List<String> getAlternateAlleleList() {
|
||||
List<String> list = new ArrayList<String>();
|
||||
for (VCFGenotypeEncoding enc : mCurrentRecord.getAlternateAlleles())
|
||||
list.add(enc.toString());
|
||||
return list;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alleles. This method should return all the alleles present at the location,
|
||||
* including the reference base. The first allele should always be the reference allele, followed
|
||||
* by an unordered list of alternate alleles.
|
||||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
@Override
|
||||
public List<String> getAlleleList() {
|
||||
List<String> ret = new ArrayList<String>();
|
||||
ret.add(String.valueOf(mCurrentRecord.getReferenceBase()));
|
||||
ret.addAll(getAlternateAlleleList());
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* are we truely a variant, given a reference
|
||||
*
|
||||
|
|
@ -207,31 +238,6 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod,
|
|||
return (!mCurrentRecord.hasAlternateAllele());
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate bases. If this is homref, throws an UnsupportedOperationException
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public String getAlternateBases() {
|
||||
if (!this.isBiallelic())
|
||||
throw new UnsupportedOperationException("We're not biallelic, so please call getAlternateBaseList instead");
|
||||
return this.mCurrentRecord.getAlternateAlleles().get(0).toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate bases. If this is homref, throws an UnsupportedOperationException
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public List<String> getAlternateBaseList() {
|
||||
List<String> list = new ArrayList<String>();
|
||||
for (VCFGenotypeEncoding enc : mCurrentRecord.getAlternateAlleles())
|
||||
list.add(enc.toString());
|
||||
return list;
|
||||
}
|
||||
|
||||
/**
|
||||
* are we an insertion or a deletion? yes, then return true. No? Well, false then.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -1,9 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import java.util.Arrays;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class SangerSNPROD extends TabularROD implements SNPCallFromGenotypes {
|
||||
|
|
@ -100,23 +101,35 @@ public class SangerSNPROD extends TabularROD implements SNPCallFromGenotypes {
|
|||
}
|
||||
|
||||
/**
|
||||
* gets the alternate base. Use this method if we're biallelic
|
||||
* gets the alternate alleles. This method should return all the alleles present at the location,
|
||||
* NOT including the reference base. This is returned as a string list with no guarantee ordering
|
||||
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
|
||||
* frequency).
|
||||
*
|
||||
* @return
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
@Override
|
||||
public String getAlternateBases() {
|
||||
return this.get("3");
|
||||
public List<String> getAlternateAlleleList() {
|
||||
List<String> ret = new ArrayList<String>();
|
||||
for (char c : get("3").toCharArray())
|
||||
ret.add(String.valueOf(c));
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate bases. Use this method if the allele count is greater then 2 (not biallelic)
|
||||
* gets the alleles. This method should return all the alleles present at the location,
|
||||
* including the reference base. The first allele should always be the reference allele, followed
|
||||
* by an unordered list of alternate alleles.
|
||||
*
|
||||
* @return
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
@Override
|
||||
public List<String> getAlternateBaseList() {
|
||||
return Arrays.asList(this.get("3"));
|
||||
public List<String> getAlleleList() {
|
||||
List<String> ret = new ArrayList<String>();
|
||||
ret.add(this.getReference());
|
||||
for (char c : get("3").toCharArray())
|
||||
ret.add(String.valueOf(c));
|
||||
return ret;
|
||||
}
|
||||
|
||||
public int length() { return 1; }
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
|
|||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
|
|
@ -96,23 +97,31 @@ public class SequenomROD extends TabularROD implements Variation {
|
|||
}
|
||||
|
||||
/**
|
||||
* gets the alternate bases. Use this method if we're biallelic
|
||||
* gets the alternate alleles. This method should return all the alleles present at the location,
|
||||
* NOT including the reference base. This is returned as a string list with no guarantee ordering
|
||||
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
|
||||
* frequency).
|
||||
*
|
||||
* @return
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
@Override
|
||||
public String getAlternateBases() {
|
||||
return getAltBasesFWD();
|
||||
public List<String> getAlternateAlleleList() {
|
||||
List<String> ret = new ArrayList<String>();
|
||||
for (char c: getAltBasesFWD().toCharArray())
|
||||
ret.add(String.valueOf(c));
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate bases. Use this method if the allele count is greater then 2 (not biallelic)
|
||||
* gets the alleles. This method should return all the alleles present at the location,
|
||||
* including the reference base. The first allele should always be the reference allele, followed
|
||||
* by an unordered list of alternate alleles.
|
||||
*
|
||||
* @return
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
@Override
|
||||
public List<String> getAlternateBaseList() {
|
||||
throw new StingException("SequenomRod is not biallelic");
|
||||
public List<String> getAlleleList() {
|
||||
throw new StingException("SequenomRod doesn't know of the reference, and can't generate allele lists");
|
||||
}
|
||||
|
||||
public boolean isHom() { return false; }
|
||||
|
|
|
|||
|
|
@ -67,26 +67,6 @@ public class SimpleIndelROD extends TabularROD implements Genotype, VariationRod
|
|||
public boolean isSNP() { return false; }
|
||||
public boolean isReference() { return false; }
|
||||
|
||||
/**
|
||||
* gets the alternate base. Use this method if we're biallelic
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public String getAlternateBases() {
|
||||
return getFWDAlleles().get(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate bases. Use this method if teh allele count is greater then 2
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public List<String> getAlternateBaseList() {
|
||||
return getFWDAlleles();
|
||||
}
|
||||
|
||||
public boolean isInsertion() {
|
||||
if ( is1KGFormat() )
|
||||
return this.get("3").equals("I");
|
||||
|
|
@ -135,6 +115,35 @@ public class SimpleIndelROD extends TabularROD implements Genotype, VariationRod
|
|||
return getVariationConfidence();
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate alleles. This method should return all the alleles present at the location,
|
||||
* NOT including the reference base. This is returned as a string list with no guarantee ordering
|
||||
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
|
||||
* frequency).
|
||||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
@Override
|
||||
public List<String> getAlternateAlleleList() {
|
||||
List<String> ret = getAlleleList();
|
||||
for (String val : ret) {
|
||||
if (val.equals(this.getReference())) ret.remove(val);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alleles. This method should return all the alleles present at the location,
|
||||
* including the reference base. The first allele should always be the reference allele, followed
|
||||
* by an unordered list of alternate alleles.
|
||||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
@Override
|
||||
public List<String> getAlleleList() {
|
||||
return this.getFWDAlleles();
|
||||
}
|
||||
|
||||
public boolean isHom() { return false; }
|
||||
public boolean isHet() { return false; }
|
||||
public double getHeterozygosity() { return 0.0; }
|
||||
|
|
|
|||
|
|
@ -4,10 +4,12 @@ import net.sf.samtools.util.SequenceUtil;
|
|||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.genotype.BasicGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
|
|
@ -22,8 +24,8 @@ import java.util.List;
|
|||
*/
|
||||
public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod, VariantBackedByGenotype {
|
||||
public GenomeLoc loc; // genome location of SNP
|
||||
// Reference sequence chromosome or scaffold
|
||||
// Start and stop positions in chrom
|
||||
// Reference sequence chromosome or scaffold
|
||||
// Start and stop positions in chrom
|
||||
|
||||
public String name; // Reference SNP identifier or Affy SNP name
|
||||
public String strand; // Which DNA strand contains the observed alleles
|
||||
|
|
@ -33,18 +35,18 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
|
|||
|
||||
public String molType; // Sample type from exemplar ss
|
||||
public String varType; // The class of variant (simple, insertion, deletion, range, etc.)
|
||||
// Can be 'unknown','single','in-del','het','microsatellite','named','mixed','mnp','insertion','deletion'
|
||||
// Can be 'unknown','single','in-del','het','microsatellite','named','mixed','mnp','insertion','deletion'
|
||||
public String validationStatus; // The validation status of the SNP
|
||||
// one of set('unknown','by-cluster','by-frequency','by-submitter','by-2hit-2allele','by-hapmap')
|
||||
// one of set('unknown','by-cluster','by-frequency','by-submitter','by-2hit-2allele','by-hapmap')
|
||||
|
||||
public double avHet; // The average heterozygosity from all observations
|
||||
public double avHetSE; // The Standard Error for the average heterozygosity
|
||||
|
||||
public String func; // The functional category of the SNP (coding-synon, coding-nonsynon, intron, etc.)
|
||||
// set('unknown','coding-synon','intron','cds-reference','near-gene-3','near-gene-5',
|
||||
// 'nonsense','missense','frameshift','untranslated-3','untranslated-5','splice-3','splice-5')
|
||||
// set('unknown','coding-synon','intron','cds-reference','near-gene-3','near-gene-5',
|
||||
// 'nonsense','missense','frameshift','untranslated-3','untranslated-5','splice-3','splice-5')
|
||||
public String locType; // How the variant affects the reference sequence
|
||||
// enum('range','exact','between','rangeInsertion','rangeSubstitution','rangeDeletion')
|
||||
// enum('range','exact','between','rangeInsertion','rangeSubstitution','rangeDeletion')
|
||||
|
||||
public int weight; // The quality of the alignment
|
||||
|
||||
|
|
@ -73,7 +75,7 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
|
|||
*/
|
||||
@Override
|
||||
public String getReference() {
|
||||
return getRefBasesFWD();
|
||||
return refBases;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -86,58 +88,43 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
|
|||
return 4; // -log10(0.0001)
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate alleles. This method should return all the alleles present at the location,
|
||||
* NOT including the reference base. This is returned as a string list with no guarantee ordering
|
||||
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
|
||||
* frequency).
|
||||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
public List<String> getAlternateAlleleList() {
|
||||
List<String> ret = getAlleleList();
|
||||
for (String allele : ret)
|
||||
if (allele.equals(getReference())) ret.remove(allele);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alleles. This method should return all the alleles present at the location,
|
||||
* including the reference base. The first allele should always be the reference allele, followed
|
||||
* by an unordered list of alternate alleles.
|
||||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
public List<String> getAlleleList() {
|
||||
List<String> ret; //ref first!!!!!
|
||||
if (onFwdStrand())
|
||||
ret = Arrays.asList(observed.split("/"));
|
||||
else
|
||||
ret = Arrays.asList(SequenceUtil.reverseComplement(observed).split("/"));
|
||||
if (ret.size() > 0 && ret.contains(getReference()) && !ret.get(0).equals(this.getReference()))
|
||||
Collections.swap(ret,ret.indexOf(getReference()),0);
|
||||
return ret;
|
||||
}
|
||||
|
||||
public boolean onFwdStrand() {
|
||||
return strand.equals("+");
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns bases in the reference allele as a String. String can be empty (as in insertion into
|
||||
* the reference), can contain a single character (as in SNP or one-base deletion), or multiple characters
|
||||
* (for longer indels).
|
||||
*
|
||||
* @return reference allele, forward strand
|
||||
*/
|
||||
public String getRefBasesFWD() {
|
||||
// fix - at least this way we ensure that we'll get the other base compared to getAltBasesFWD()
|
||||
return (getAllelesFWD().get(0).equals(refBases)) ? getAllelesFWD().get(0) : getAllelesFWD().get(1);
|
||||
//if ( onFwdStrand() )
|
||||
// return refBases;
|
||||
//else
|
||||
// return SequenceUtil.reverseComplement(refBases);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns reference (major) allele base for a SNP variant as a character; should throw IllegalStateException
|
||||
* if variant is not a SNP.
|
||||
*
|
||||
* @return reference base on the forward strand
|
||||
*/
|
||||
public char getRefSnpFWD() throws IllegalStateException {
|
||||
//System.out.printf("refbases is %s but %s%n", refBases, toString());
|
||||
if (isIndel()) throw new IllegalStateException("Variant is not a SNP");
|
||||
// fix - at least this way we ensure that we'll get the other base compared to getAltBasesFWD()
|
||||
List<String> alleles = getAllelesFWD();
|
||||
String val = (alleles.get(0).equals(refBases) ? alleles.get(0) : alleles.get(1));
|
||||
return val.charAt(0);
|
||||
// if ( onFwdStrand() ) return refBases.charAt(0);
|
||||
// else return SequenceUtil.reverseComplement(refBases).charAt(0);
|
||||
}
|
||||
|
||||
public List<String> getAllelesFWD() {
|
||||
List<String> alleles = null;
|
||||
if (onFwdStrand())
|
||||
alleles = Arrays.asList(observed.split("/"));
|
||||
else
|
||||
alleles = Arrays.asList(SequenceUtil.reverseComplement(observed).split("/"));
|
||||
|
||||
//System.out.printf("getAlleles %s on %s %b => %s %n", observed, strand, onFwdStrand(), Utils.join("/", alleles));
|
||||
return alleles;
|
||||
}
|
||||
|
||||
public String getAllelesFWDString() {
|
||||
return Utils.join("", getAllelesFWD());
|
||||
}
|
||||
|
||||
/**
|
||||
* get the frequency of this variant
|
||||
*
|
||||
|
|
@ -145,7 +132,7 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
|
|||
*/
|
||||
@Override
|
||||
public double getNonRefAlleleFrequency() {
|
||||
return 0; //To change body of implemented methods use File | Settings | File Templates.
|
||||
return 0; // dbSNP doesn't know the allele frequency
|
||||
}
|
||||
|
||||
/** @return the VARIANT_TYPE of the current variant */
|
||||
|
|
@ -170,28 +157,6 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
|
|||
return varType.contains("deletion");
|
||||
}
|
||||
|
||||
/**
|
||||
* get the base representation of this Variant
|
||||
*
|
||||
* @return a string, of ploidy
|
||||
*/
|
||||
@Override
|
||||
public String getAlternateBases() {
|
||||
return getAllelesFWDString();
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate bases. Use this method if teh allele count is greater then 2
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public List<String> getAlternateBaseList() {
|
||||
List<String> list = new ArrayList<String>();
|
||||
list.add(this.getAlternateBases());
|
||||
return list;
|
||||
}
|
||||
|
||||
public boolean isIndel() {
|
||||
return isInsertion() || isDeletion() || varType.contains("in-del");
|
||||
}
|
||||
|
|
@ -204,11 +169,12 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
|
|||
*/
|
||||
@Override
|
||||
public char getAlternativeBaseForSNP() {
|
||||
return getAltSnpFWD(); /*
|
||||
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
|
||||
if (getAlternateBases().charAt(0) == this.getReference())
|
||||
return getAlternateBases().charAt(1);
|
||||
return getAlternateBases().charAt(0); */
|
||||
if (!isSNP()) throw new StingException("We're not a SNP; called in DbSNP rod at position " + this.loc);
|
||||
if (!isBiallelic()) throw new StingException("We're not biallelic; at position " + this.loc);
|
||||
List<String> ret = this.getAlternateAlleleList();
|
||||
if (ret.size() == 1 && ret.get(0).length() == 1)
|
||||
return ret.get(0).charAt(0);
|
||||
throw new StingException("getAlternativeBaseForSNP failed for DbSNP rod " + this.loc);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -218,12 +184,14 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
|
|||
*/
|
||||
@Override
|
||||
public char getReferenceForSNP() {
|
||||
return 0; //To change body of implemented methods use File | Settings | File Templates.
|
||||
if (!isSNP()) throw new StingException("We're not a SNP; called in DbSNP rod at position " + this.loc);
|
||||
if (refBases.length() != 1) throw new StingException("The reference base in DbSNP must be zero, at position " + this.loc + " was " + refBases);
|
||||
return refBases.charAt(0); // we know it's length 1, this is ok
|
||||
}
|
||||
|
||||
public boolean isReference() {
|
||||
return false;
|
||||
} // snp locations are never "reference", there's always a variant
|
||||
return false; // snp locations are never "reference", there's always a variant
|
||||
}
|
||||
|
||||
public boolean isHapmap() {
|
||||
return validationStatus.contains("by-hapmap");
|
||||
|
|
@ -250,7 +218,7 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
|
|||
}
|
||||
|
||||
public String toMediumString() {
|
||||
String s = String.format("%s:%s:%s", getLocation().toString(), name, getAllelesFWDString());
|
||||
String s = String.format("%s:%s:%s", getLocation().toString(), name, Utils.join("",this.getAlleleList()));
|
||||
if (isSNP()) s += ":SNP";
|
||||
if (isIndel()) s += ":Indel";
|
||||
if (isHapmap()) s += ":Hapmap";
|
||||
|
|
@ -300,49 +268,12 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
|
|||
}
|
||||
}
|
||||
|
||||
public String getAltBasesFWD() {
|
||||
List<String> alleles = getAllelesFWD();
|
||||
return (alleles.get(0).equals(refBases) ? alleles.get(1) : alleles.get(0));
|
||||
}
|
||||
|
||||
public char getAltSnpFWD() throws IllegalStateException {
|
||||
if (!isSNP())
|
||||
throw new IllegalStateException("I'm not a SNP");
|
||||
return getAltBasesFWD().charAt(0);
|
||||
}
|
||||
|
||||
public double getConsensusConfidence() {
|
||||
// TODO Auto-generated method stub
|
||||
return Double.MAX_VALUE;
|
||||
}
|
||||
|
||||
public List<String> getGenotype() throws IllegalStateException {
|
||||
return Arrays.asList(Utils.join("", getAllelesFWD()));
|
||||
}
|
||||
|
||||
public double getMAF() {
|
||||
// Fixme: update to actually get MAF
|
||||
//return avHet;
|
||||
return -1;
|
||||
}
|
||||
|
||||
public double getHeterozygosity() {
|
||||
return avHet;
|
||||
}
|
||||
|
||||
public int getPloidy() throws IllegalStateException {
|
||||
// TODO Auto-generated method stub
|
||||
return 0;
|
||||
}
|
||||
|
||||
public double getVariationConfidence() {
|
||||
// TODO Auto-generated method stub
|
||||
return Double.MAX_VALUE;
|
||||
}
|
||||
|
||||
public boolean isGenotype() {
|
||||
// TODO Auto-generated method stub
|
||||
return false;
|
||||
return 2; // our DbSNP assumes a diploid human
|
||||
}
|
||||
|
||||
public boolean isBiallelic() {
|
||||
|
|
@ -350,10 +281,6 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
|
|||
return observed.indexOf('/') == observed.lastIndexOf('/');
|
||||
}
|
||||
|
||||
public int length() {
|
||||
return (int) (loc.getStop() - loc.getStart() + 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the genotype
|
||||
*
|
||||
|
|
@ -361,7 +288,10 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
|
|||
*/
|
||||
@Override
|
||||
public org.broadinstitute.sting.utils.genotype.Genotype getCalledGenotype() {
|
||||
return new BasicGenotype(this.getLocation(), this.getAltBasesFWD(), this.getRefSnpFWD(), this.getConsensusConfidence());
|
||||
return new BasicGenotype(getLocation(),
|
||||
BasicGenotype.alleleListToString(getAlleleList()),
|
||||
Utils.stringToChar(getReference()),
|
||||
getNegLog10PError());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -371,11 +301,12 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
|
|||
*/
|
||||
@Override
|
||||
public List<org.broadinstitute.sting.utils.genotype.Genotype> getGenotypes() {
|
||||
List<org.broadinstitute.sting.utils.genotype.Genotype> list = new ArrayList<org.broadinstitute.sting.utils.genotype.Genotype>();
|
||||
list.add(new BasicGenotype(this.getLocation(), this.getAltBasesFWD(), this.getRefSnpFWD(), this.getConsensusConfidence()));
|
||||
ArrayList<Genotype> list = new ArrayList<Genotype>();
|
||||
list.add(getCalledGenotype());
|
||||
return list;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* do we have the specified genotype? not all backedByGenotypes
|
||||
* have all the genotype data.
|
||||
|
|
@ -386,21 +317,21 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements VariationRod
|
|||
*/
|
||||
@Override
|
||||
public boolean hasGenotype(DiploidGenotype x) {
|
||||
return (!x.toString().equals(this.getAltBasesFWD())) ? false : true;
|
||||
return (!x.toString().equals(BasicGenotype.alleleListToString(getAlleleList()))) ? false : true;
|
||||
}
|
||||
|
||||
public static rodDbSNP getFirstRealSNP(RODRecordList<ReferenceOrderedDatum> dbsnpList) {
|
||||
if ( dbsnpList == null )
|
||||
return null;
|
||||
if (dbsnpList == null)
|
||||
return null;
|
||||
|
||||
rodDbSNP dbsnp = null;
|
||||
for ( ReferenceOrderedDatum d : dbsnpList ) {
|
||||
if ( ((rodDbSNP)d).isSNP() ) {
|
||||
dbsnp = (rodDbSNP)d;
|
||||
break;
|
||||
}
|
||||
}
|
||||
rodDbSNP dbsnp = null;
|
||||
for (ReferenceOrderedDatum d : dbsnpList) {
|
||||
if (((rodDbSNP) d).isSNP()) {
|
||||
dbsnp = (rodDbSNP) d;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return dbsnp;
|
||||
return dbsnp;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -49,19 +49,20 @@ public class FastaAlternateReferenceWalker extends FastaReferenceWalker {
|
|||
continue;
|
||||
// if we have multiple variants at a locus, just take the first damn one we see for now
|
||||
Variation variant = (Variation) rod;
|
||||
if (variant.getAlleleList().size() != 2) System.err.println("Not two " + Utils.join("-",variant.getAlleleList()));
|
||||
if (!rod.getName().startsWith("snpmask") && variant.isDeletion()) {
|
||||
deletionBasesRemaining = variant.getAlternateBases().length();
|
||||
deletionBasesRemaining = variant.getAlleleList().get(0).length();
|
||||
basesSeen++;
|
||||
if (indelsWriter != null)
|
||||
indelsWriter.println(fasta.getCurrentID() + ":" + basesSeen + "-" + (basesSeen + variant.getAlternateBases().length()));
|
||||
indelsWriter.println(fasta.getCurrentID() + ":" + basesSeen + "-" + (basesSeen + variant.getAlleleList().get(0).length()));
|
||||
// delete the next n bases, not this one
|
||||
return new Pair<GenomeLoc, String>(context.getLocation(), refBase);
|
||||
} else if (!rod.getName().startsWith("snpmask") && variant.isInsertion()) {
|
||||
basesSeen++;
|
||||
if (indelsWriter != null)
|
||||
indelsWriter.println(fasta.getCurrentID() + ":" + basesSeen + "-" + (basesSeen + variant.getAlternateBases().length()));
|
||||
basesSeen += variant.getAlternateBases().length();
|
||||
return new Pair<GenomeLoc, String>(context.getLocation(), refBase.concat(variant.getAlternateBases()));
|
||||
indelsWriter.println(fasta.getCurrentID() + ":" + basesSeen + "-" + (basesSeen + variant.getAlleleList().get(0).length()));
|
||||
basesSeen += variant.getAlleleList().get(0).length();
|
||||
return new Pair<GenomeLoc, String>(context.getLocation(), refBase.concat(Utils.join("",variant.getAlleleList())));
|
||||
} else if (variant.isSNP()) {
|
||||
basesSeen++;
|
||||
return new Pair<GenomeLoc, String>(context.getLocation(), (rod.getName().startsWith("snpmask") ? "N" : String.valueOf(variant.getAlternativeBaseForSNP())));
|
||||
|
|
|
|||
|
|
@ -70,9 +70,9 @@ public class PickSequenomProbes extends RefWalker<String, String> {
|
|||
if ( variant.isSNP() )
|
||||
assay_sequence = leading_bases + "[" + refBase + "/" + variant.getAlternativeBaseForSNP() + "]" + trailing_bases;
|
||||
else if ( variant.isInsertion() )
|
||||
assay_sequence = leading_bases + refBase + "[-/" + variant.getAlternateBases() + "]" + trailing_bases;
|
||||
assay_sequence = leading_bases + refBase + "[-/" + Utils.join("",variant.getAlleleList()) + "]" + trailing_bases;
|
||||
else if ( variant.isDeletion() )
|
||||
assay_sequence = leading_bases + refBase + "[" + variant.getAlternateBases() + "/-]" + trailing_bases.substring(variant.getAlternateBases().length());
|
||||
assay_sequence = leading_bases + refBase + "[" + Utils.join("",variant.getAlleleList()) + "/-]" + trailing_bases.substring(variant.getAlleleList().size());
|
||||
else
|
||||
return "";
|
||||
|
||||
|
|
|
|||
|
|
@ -1,22 +1,22 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||
import org.broadinstitute.sting.gatk.refdata.HapMapAlleleFrequenciesROD;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Formatter;
|
||||
import static java.lang.Math.log10;
|
||||
|
||||
import edu.mit.broad.picard.genotype.DiploidGenotype;
|
||||
import net.sf.picard.PicardException;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.HapMapAlleleFrequenciesROD;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
|
||||
import static java.lang.Math.log10;
|
||||
import java.util.Arrays;
|
||||
import java.util.Formatter;
|
||||
import java.util.List;
|
||||
|
||||
public class PopPriorWalker extends LocusWalker<Integer, Integer> {
|
||||
|
||||
|
|
@ -174,7 +174,7 @@ public class PopPriorWalker extends LocusWalker<Integer, Integer> {
|
|||
|
||||
priors = getKnownSiteKnownFreqPriors(((byte)(upRef & 0xff)), knownAlleles, hapmap.getVarAlleleFreq());
|
||||
} else if (dbsnpInfo != null && dbsnpInfo.isSNP()) {
|
||||
List<String> knownAlleles = dbsnpInfo.getAllelesFWD();
|
||||
List<String> knownAlleles = Arrays.asList(Utils.join("",dbsnpInfo.getAlleleList()));
|
||||
priorType = "DBSNP";
|
||||
rodString = "[DBSNP: " + dbsnpInfo.toMediumString() + "]";
|
||||
|
||||
|
|
|
|||
|
|
@ -68,7 +68,8 @@ public class IndelSubsets implements ConcordanceType {
|
|||
// only deal with a valid indel
|
||||
Variation indel = ( indel1 != null ? indel1 : indel2 );
|
||||
|
||||
int size = ( indel.getAlternateBases().length() <= sizeCutoff ? 0 : 1 );
|
||||
// we only deal with the first allele
|
||||
int size = ( indel.getAlternateAlleleList().get(0).length() <= sizeCutoff ? 0 : 1 );
|
||||
int homopol = ( homopolymerRunSize(ref, indel) <= homopolymerCutoff ? 0 : 1 );
|
||||
|
||||
writers[set1][set2][size][homopol].println(indel.toString());
|
||||
|
|
@ -80,7 +81,7 @@ public class IndelSubsets implements ConcordanceType {
|
|||
GenomeLoc locus = ref.getLocus();
|
||||
|
||||
int refBasePos = (int)(locus.getStart() - window.getStart());
|
||||
char indelBase = indel.isDeletion() ? bases[refBasePos+1] : indel.getAlternateBases().charAt(0);
|
||||
char indelBase = indel.isDeletion() ? bases[refBasePos+1] : indel.getAlternateAlleleList().get(0).charAt(0);
|
||||
int leftRun = 0;
|
||||
for ( int i = refBasePos; i >= 0; i--) {
|
||||
if ( bases[i] != indelBase )
|
||||
|
|
@ -88,9 +89,9 @@ public class IndelSubsets implements ConcordanceType {
|
|||
leftRun++;
|
||||
}
|
||||
|
||||
indelBase = indel.isDeletion() ? bases[Math.min(refBasePos+indel.getAlternateBases().length(),bases.length-1)] : indel.getAlternateBases().charAt(indel.getAlternateBases().length()-1);
|
||||
indelBase = indel.isDeletion() ? bases[Math.min(refBasePos+indel.getAlternateAlleleList().get(0).length(),bases.length-1)] : indel.getAlternateAlleleList().get(0).charAt(indel.getAlternateAlleleList().get(0).length()-1);
|
||||
int rightRun = 0;
|
||||
for ( int i = refBasePos + (indel.isDeletion() ? 1+indel.getAlternateBases().length() : 1); i < bases.length; i++) {
|
||||
for ( int i = refBasePos + (indel.isDeletion() ? 1+indel.getAlternateAlleleList().get(0).length() : 1); i < bases.length; i++) {
|
||||
if ( bases[i] != indelBase )
|
||||
break;
|
||||
rightRun++;
|
||||
|
|
|
|||
|
|
@ -12,10 +12,9 @@ import java.util.List;
|
|||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
*
|
||||
* <p/>
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*
|
||||
*/
|
||||
public class IndelMetricsAnalysis extends BasicVariantAnalysis implements GenotypeAnalysis, PopulationAnalysis {
|
||||
long insertions = 0;
|
||||
|
|
@ -30,19 +29,20 @@ public class IndelMetricsAnalysis extends BasicVariantAnalysis implements Genoty
|
|||
}
|
||||
|
||||
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||
if ( eval != null && eval.isInsertion() ) {
|
||||
if ( eval.isInsertion() )
|
||||
if (eval != null && eval.isInsertion()) {
|
||||
if (eval.isInsertion())
|
||||
insertions++;
|
||||
else if ( eval.isDeletion() )
|
||||
else if (eval.isDeletion())
|
||||
deletions++;
|
||||
else
|
||||
throw new RuntimeException("Variation is indel, but isn't insertion or deletion!");
|
||||
|
||||
if ( eval.getAlternateBases().length() < 100 ) {
|
||||
sizes[eval.isDeletion() ? 0 : 1][eval.getAlternateBases().length()]++;
|
||||
if ( eval.getAlternateBases().length() > maxSize )
|
||||
maxSize = eval.getAlternateBases().length();
|
||||
}
|
||||
for (String allele : eval.getAlleleList())
|
||||
if (allele.length() < 100) {
|
||||
sizes[eval.isDeletion() ? 0 : 1][allele.length()]++;
|
||||
if (allele.length() > maxSize)
|
||||
maxSize = allele.length();
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
|
|
@ -56,7 +56,7 @@ public class IndelMetricsAnalysis extends BasicVariantAnalysis implements Genoty
|
|||
|
||||
s.add("Size Distribution");
|
||||
s.add("size\tdeletions\tinsertions");
|
||||
for ( int i = 1; i <= maxSize; i++ )
|
||||
for (int i = 1; i <= maxSize; i++)
|
||||
s.add(String.format("%d\t%d\t\t%d", i, sizes[0][i], sizes[1][i]));
|
||||
|
||||
return s;
|
||||
|
|
|
|||
|
|
@ -201,7 +201,7 @@ class PooledConcordanceTable {
|
|||
|
||||
public boolean pooledCallIsRef(Variation eval, char ref) {
|
||||
// code broken out for easy alteration when we start using pool-specific variations
|
||||
return eval.getAlternateBases().equalsIgnoreCase((Utils.dupString(ref,2)));
|
||||
return Utils.join("",eval.getAlleleList()).equalsIgnoreCase((Utils.dupString(ref,2)));
|
||||
}
|
||||
|
||||
public int calculateNumFrequencyIndeces(int poolSize) {
|
||||
|
|
@ -225,7 +225,7 @@ class PooledConcordanceTable {
|
|||
for ( Variation eval : evals ) {
|
||||
if ( mismatchingCalls(firstEval, eval, ref) ) {
|
||||
// todo -- make this not a StingException but go to the log
|
||||
throw new StingException("Tri-Allelic Position "+eval.getAlternateBases()+"/"+firstEval.getAlternateBases() + " Ref: "+ ref + " not supported");
|
||||
throw new StingException("Tri-Allelic Position "+Utils.join("",eval.getAlleleList())+"/"+Utils.join("",firstEval.getAlleleList()) + " Ref: "+ ref + " not supported");
|
||||
} else {
|
||||
alternateFrequency += calledVariantFrequency(eval,ref);
|
||||
}
|
||||
|
|
@ -249,10 +249,10 @@ class PooledConcordanceTable {
|
|||
|
||||
public boolean mismatchingCalls(Variation eval, Variation chip, char ref) {
|
||||
// eval and chip guaranteed to be non-null
|
||||
char chipF = chip.getAlternateBases().charAt(0);
|
||||
char chipS = chip.getAlternateBases().charAt(1);
|
||||
char evalF = chip.getAlternateBases().charAt(0);
|
||||
char evalS = chip.getAlternateBases().charAt(1);
|
||||
char chipF = Utils.stringToChar(chip.getAlleleList().get(0));
|
||||
char chipS = Utils.stringToChar(chip.getAlleleList().get(1));
|
||||
char evalF = Utils.stringToChar(eval.getAlleleList().get(0));
|
||||
char evalS = Utils.stringToChar(eval.getAlleleList().get(1));
|
||||
boolean mismatch;
|
||||
if (chipF == ref) {
|
||||
if ( chipS == ref ) {
|
||||
|
|
@ -277,7 +277,7 @@ class PooledConcordanceTable {
|
|||
|
||||
public double calledVariantFrequency( Variation var, char ref ) {
|
||||
// code broken out for easy alteration when we start using pool-specific variations
|
||||
String varStr = var.getAlternateBases();
|
||||
String varStr = Utils.join("",var.getAlleleList());
|
||||
double freq;
|
||||
if ( varStr.charAt(0) != ref && varStr.charAt(1) != ref ) {
|
||||
freq = (double) 2;
|
||||
|
|
|
|||
|
|
@ -1,7 +1,9 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.BrokenRODSimulator;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
|
@ -90,9 +92,9 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
|
|||
}
|
||||
public boolean discordantP(Variation dbSNP, Variation eval) {
|
||||
if (eval != null) {
|
||||
char alt = (eval.isSNP()) ? eval.getAlternativeBaseForSNP() : eval.getReference().charAt(0);
|
||||
char alt = (eval.isSNP()) ? eval.getAlternativeBaseForSNP() : Utils.stringToChar(eval.getReference());
|
||||
if (dbSNP != null && dbSNP.isSNP())
|
||||
return !dbSNP.getAlternateBases().contains(String.valueOf(alt));
|
||||
return !dbSNP.getAlleleList().contains(String.valueOf(alt));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
@ -125,7 +127,7 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
|
|||
if (dbsnp.isSNP() && eval.isSNP() && discordantP(dbsnp, eval)) {
|
||||
return String.format("Discordant [DBSNP %s] [EVAL %s]", dbsnp, eval);
|
||||
} else if (dbsnp.isIndel() && eval.isSNP()) {
|
||||
return String.format("SNP-at-indel DBSNP=%s %s", dbsnp.getAlternateBases(), eval);
|
||||
return String.format("SNP-at-indel DBSNP=%s %s", Utils.join("",dbsnp.getAlleleList()), eval);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -81,7 +81,7 @@ public class GenotypeUtils {
|
|||
if ( var instanceof Genotype )
|
||||
return ((Genotype)var).isHet();
|
||||
|
||||
String genotype = var.getAlternateBases();
|
||||
String genotype = Utils.join("",var.getAlleleList());
|
||||
if ( genotype.length() < 1 )
|
||||
return false;
|
||||
|
||||
|
|
|
|||
|
|
@ -2,16 +2,11 @@ package org.broadinstitute.sting.utils;
|
|||
|
||||
import net.sf.samtools.*;
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||
|
||||
import java.util.*;
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileReader;
|
||||
import java.io.BufferedReader;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: depristo
|
||||
|
|
@ -20,9 +15,7 @@ import org.apache.log4j.Logger;
|
|||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class Utils {
|
||||
/**
|
||||
* our log, which we want to capture anything from this class
|
||||
*/
|
||||
/** our log, which we want to capture anything from this class */
|
||||
private static Logger logger = Logger.getLogger(Utils.class);
|
||||
|
||||
public static void warnUser(final String msg) {
|
||||
|
|
@ -45,25 +38,28 @@ public class Utils {
|
|||
|
||||
/**
|
||||
* Compares two objects, either of which might be null.
|
||||
*
|
||||
* @param lhs One object to compare.
|
||||
* @param rhs The other object to compare.
|
||||
*
|
||||
* @return True if the two objects are equal, false otherwise.
|
||||
*/
|
||||
public static boolean equals(Object lhs, Object rhs) {
|
||||
if( lhs == null && rhs == null ) return true;
|
||||
else if( lhs == null ) return false;
|
||||
if (lhs == null && rhs == null) return true;
|
||||
else if (lhs == null) return false;
|
||||
else return lhs.equals(rhs);
|
||||
}
|
||||
|
||||
public static <T> List<T> cons(final T elt, final List<T> l) {
|
||||
List<T> l2 = new ArrayList<T>();
|
||||
l2.add(elt);
|
||||
if ( l != null ) l2.addAll(l);
|
||||
if (l != null) l2.addAll(l);
|
||||
return l2;
|
||||
}
|
||||
|
||||
/**
|
||||
* pretty print the warning message supplied
|
||||
*
|
||||
* @param message the message
|
||||
*/
|
||||
private static void prettyPrintWarningMessage(String message) {
|
||||
|
|
@ -71,13 +67,13 @@ public class Utils {
|
|||
while (builder.length() > 70) {
|
||||
int space = builder.lastIndexOf(" ", 70);
|
||||
if (space <= 0) space = 70;
|
||||
logger.warn(String.format("* %s", builder.substring(0,space)));
|
||||
builder.delete(0,space + 1);
|
||||
logger.warn(String.format("* %s", builder.substring(0, space)));
|
||||
builder.delete(0, space + 1);
|
||||
}
|
||||
logger.warn(String.format("* %s", builder));
|
||||
}
|
||||
|
||||
public static SAMFileHeader copySAMFileHeader( SAMFileHeader toCopy ) {
|
||||
public static SAMFileHeader copySAMFileHeader(SAMFileHeader toCopy) {
|
||||
SAMFileHeader copy = new SAMFileHeader();
|
||||
|
||||
copy.setSortOrder(toCopy.getSortOrder());
|
||||
|
|
@ -86,9 +82,9 @@ public class Utils {
|
|||
copy.setReadGroups(toCopy.getReadGroups());
|
||||
copy.setSequenceDictionary(toCopy.getSequenceDictionary());
|
||||
|
||||
for ( Map.Entry<String, Object> e : toCopy.getAttributes())
|
||||
for (Map.Entry<String, Object> e : toCopy.getAttributes())
|
||||
copy.setAttribute(e.getKey(), e.getValue());
|
||||
|
||||
|
||||
return copy;
|
||||
}
|
||||
|
||||
|
|
@ -104,6 +100,7 @@ public class Utils {
|
|||
*
|
||||
* @param pred filtering condition ( objects, for which pred.apply() is true pass the filter )
|
||||
* @param c collection to filter (will not be modified)
|
||||
*
|
||||
* @return new list built from elements of <c> passing the filter
|
||||
* @see #filterInPlace(Predicate pred, Collection c)
|
||||
*/
|
||||
|
|
@ -135,6 +132,7 @@ public class Utils {
|
|||
*
|
||||
* @param pred filtering condition (only elements, for which pred.apply() is true will be kept in the collection)
|
||||
* @param c collection to filter (will be modified - should be mutable and should implement remove() )
|
||||
*
|
||||
* @return reference to the same (modified) collection <c>
|
||||
* @see #filter(Predicate pred, Collection c)
|
||||
*/
|
||||
|
|
@ -175,12 +173,12 @@ public class Utils {
|
|||
|
||||
public static ArrayList<Byte> subseq(char[] fullArray) {
|
||||
byte[] fullByteArray = new byte[fullArray.length];
|
||||
StringUtil.charsToBytes(fullArray,0,fullArray.length,fullByteArray,0);
|
||||
StringUtil.charsToBytes(fullArray, 0, fullArray.length, fullByteArray, 0);
|
||||
return subseq(fullByteArray);
|
||||
}
|
||||
|
||||
public static ArrayList<Byte> subseq(byte[] fullArray) {
|
||||
return subseq(fullArray, 0, fullArray.length-1);
|
||||
return subseq(fullArray, 0, fullArray.length - 1);
|
||||
}
|
||||
|
||||
public static ArrayList<Byte> subseq(byte[] fullArray, int start, int end) {
|
||||
|
|
@ -204,9 +202,9 @@ public class Utils {
|
|||
|
||||
public static boolean is454Read(SAMRecord read) {
|
||||
SAMReadGroupRecord readGroup = read.getReadGroup();
|
||||
if ( readGroup != null ) {
|
||||
if (readGroup != null) {
|
||||
Object readPlatformAttr = readGroup.getAttribute("PL");
|
||||
if ( readPlatformAttr != null )
|
||||
if (readPlatformAttr != null)
|
||||
return readPlatformAttr.toString().toUpperCase().contains("454");
|
||||
}
|
||||
return false;
|
||||
|
|
@ -248,7 +246,7 @@ public class Utils {
|
|||
return "";
|
||||
}
|
||||
StringBuilder ret = new StringBuilder(strings[start]);
|
||||
for (int i = start+1; i < end; ++i) {
|
||||
for (int i = start + 1; i < end; ++i) {
|
||||
ret.append(separator);
|
||||
ret.append(strings[i]);
|
||||
}
|
||||
|
|
@ -332,13 +330,13 @@ public class Utils {
|
|||
public static Integer[] SortPermutation(final double[] A) {
|
||||
class comparator implements Comparator<Integer> {
|
||||
public int compare(Integer a, Integer b) {
|
||||
if (A[a.intValue()] < A[ b.intValue() ]) {
|
||||
if (A[a.intValue()] < A[b.intValue()]) {
|
||||
return -1;
|
||||
}
|
||||
if (A[ a.intValue() ] == A[ b.intValue() ]) {
|
||||
if (A[a.intValue()] == A[b.intValue()]) {
|
||||
return 0;
|
||||
}
|
||||
if (A[ a.intValue() ] > A[ b.intValue() ]) {
|
||||
if (A[a.intValue()] > A[b.intValue()]) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
|
|
@ -401,8 +399,7 @@ public class Utils {
|
|||
return output;
|
||||
}
|
||||
|
||||
public static <T> List<T> PermuteList(List<T> list, Integer[] permutation)
|
||||
{
|
||||
public static <T> List<T> PermuteList(List<T> list, Integer[] permutation) {
|
||||
List<T> output = new ArrayList<T>();
|
||||
for (int i = 0; i < permutation.length; i++) {
|
||||
output.add(list.get(permutation[i]));
|
||||
|
|
@ -412,47 +409,59 @@ public class Utils {
|
|||
|
||||
|
||||
/** Draw N random elements from list. */
|
||||
public static <T> List<T> RandomSubset(List<T> list, int N)
|
||||
{
|
||||
if (list.size() <= N) { return list; }
|
||||
public static <T> List<T> RandomSubset(List<T> list, int N) {
|
||||
if (list.size() <= N) {
|
||||
return list;
|
||||
}
|
||||
|
||||
java.util.Random random = new java.util.Random();
|
||||
|
||||
int idx[] = new int[list.size()];
|
||||
for (int i = 0; i < list.size(); i++) { idx[i] = random.nextInt(); }
|
||||
for (int i = 0; i < list.size(); i++) {
|
||||
idx[i] = random.nextInt();
|
||||
}
|
||||
|
||||
Integer[] perm = SortPermutation(idx);
|
||||
Integer[] perm = SortPermutation(idx);
|
||||
|
||||
List<T> ans = new ArrayList<T>();
|
||||
for (int i = 0; i < N; i++) { ans.add(list.get(perm[i])); }
|
||||
for (int i = 0; i < N; i++) {
|
||||
ans.add(list.get(perm[i]));
|
||||
}
|
||||
|
||||
return ans;
|
||||
}
|
||||
|
||||
// lifted from the internet
|
||||
// http://www.cs.princeton.edu/introcs/91float/Gamma.java.html
|
||||
public static double logGamma(double x)
|
||||
{
|
||||
double tmp = (x - 0.5) * Math.log(x + 4.5) - (x + 4.5);
|
||||
double ser = 1.0 + 76.18009173 / (x + 0) - 86.50532033 / (x + 1)
|
||||
+ 24.01409822 / (x + 2) - 1.231739516 / (x + 3)
|
||||
+ 0.00120858003 / (x + 4) - 0.00000536382 / (x + 5);
|
||||
return tmp + Math.log(ser * Math.sqrt(2 * Math.PI));
|
||||
public static double logGamma(double x) {
|
||||
double tmp = (x - 0.5) * Math.log(x + 4.5) - (x + 4.5);
|
||||
double ser = 1.0 + 76.18009173 / (x + 0) - 86.50532033 / (x + 1)
|
||||
+ 24.01409822 / (x + 2) - 1.231739516 / (x + 3)
|
||||
+ 0.00120858003 / (x + 4) - 0.00000536382 / (x + 5);
|
||||
return tmp + Math.log(ser * Math.sqrt(2 * Math.PI));
|
||||
}
|
||||
|
||||
public static double percentage(double x, double base) { return (base> 0 ? (x/base)*100.0 : 0); }
|
||||
public static double percentage(int x, int base) { return (base> 0 ? ((double)x/(double)base)*100.0 : 0); }
|
||||
public static double percentage(long x, long base) { return (base> 0 ? ((double)x/(double)base)*100.0 : 0); }
|
||||
public static double percentage(double x, double base) {
|
||||
return (base > 0 ? (x / base) * 100.0 : 0);
|
||||
}
|
||||
|
||||
public static String dupString( char c, int nCopies ) {
|
||||
public static double percentage(int x, int base) {
|
||||
return (base > 0 ? ((double) x / (double) base) * 100.0 : 0);
|
||||
}
|
||||
|
||||
public static double percentage(long x, long base) {
|
||||
return (base > 0 ? ((double) x / (double) base) * 100.0 : 0);
|
||||
}
|
||||
|
||||
public static String dupString(char c, int nCopies) {
|
||||
char[] chars = new char[nCopies];
|
||||
Arrays.fill(chars,c);
|
||||
Arrays.fill(chars, c);
|
||||
return new String(chars);
|
||||
}
|
||||
|
||||
public static int countOccurrences(char c, String s) {
|
||||
int count = 0;
|
||||
for ( int i = 0; i < s.length(); i++ ) {
|
||||
for (int i = 0; i < s.length(); i++) {
|
||||
count += s.charAt(i) == c ? 1 : 0;
|
||||
}
|
||||
return count;
|
||||
|
|
@ -460,17 +469,17 @@ public class Utils {
|
|||
|
||||
public static <T> int countOccurrences(T x, List<T> l) {
|
||||
int count = 0;
|
||||
for ( T y : l ) {
|
||||
if ( x.equals(y) ) count++;
|
||||
for (T y : l) {
|
||||
if (x.equals(y)) count++;
|
||||
}
|
||||
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
public static byte listMaxByte(List<Byte> quals) {
|
||||
if ( quals.size() == 0 ) return 0;
|
||||
if (quals.size() == 0) return 0;
|
||||
byte m = quals.get(0);
|
||||
for ( byte b : quals ) {
|
||||
for (byte b : quals) {
|
||||
m = b > m ? b : m;
|
||||
}
|
||||
return m;
|
||||
|
|
@ -479,67 +488,84 @@ public class Utils {
|
|||
|
||||
/** Returns indices of all occurrences of the specified symbol in the string */
|
||||
public static int[] indexOfAll(String s, int ch) {
|
||||
int[] pos = new int[64];
|
||||
int z = 0;
|
||||
|
||||
for ( int i = 0 ; i < s.length() ; i++ ) {
|
||||
if ( s.charAt(i) == ch ) pos[z++] = i;
|
||||
}
|
||||
return reallocate(pos,z);
|
||||
int[] pos = new int[64];
|
||||
int z = 0;
|
||||
|
||||
for (int i = 0; i < s.length(); i++) {
|
||||
if (s.charAt(i) == ch) pos[z++] = i;
|
||||
}
|
||||
return reallocate(pos, z);
|
||||
}
|
||||
|
||||
/** Returns new (reallocated) integer array of the specified size, with content
|
||||
|
||||
/**
|
||||
* Returns new (reallocated) integer array of the specified size, with content
|
||||
* of the original array <code>orig</code> copied into it. If <code>newSize</code> is
|
||||
* less than the size of the original array, only first <code>newSize</code> elements will be copied.
|
||||
* If new size is greater than the size of the original array, the content of the original array will be padded
|
||||
* with zeros up to the new size. Finally, if new size is the same as original size, no memory reallocation
|
||||
* will be performed and the original array will be returned instead.
|
||||
* with zeros up to the new size. Finally, if new size is the same as original size, no memory reallocation
|
||||
* will be performed and the original array will be returned instead.
|
||||
*
|
||||
* @param orig
|
||||
* @param newSize
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public static int[] reallocate(int[] orig, int newSize) {
|
||||
if ( orig.length == newSize ) return orig;
|
||||
int[] new_array = new int[newSize];
|
||||
int L = ( newSize > orig.length ? orig.length : newSize );
|
||||
for ( int i = 0 ; i < L ; i++ ) new_array[i] = orig[i];
|
||||
return new_array;
|
||||
if (orig.length == newSize) return orig;
|
||||
int[] new_array = new int[newSize];
|
||||
int L = (newSize > orig.length ? orig.length : newSize);
|
||||
for (int i = 0; i < L; i++) new_array[i] = orig[i];
|
||||
return new_array;
|
||||
}
|
||||
|
||||
|
||||
/* TEST ME
|
||||
public static void main(String[] argv) {
|
||||
List<Integer> l1 = new LinkedList<Integer>();
|
||||
List<Integer> l2 = new ArrayList<Integer>();
|
||||
public static void main(String[] argv) {
|
||||
List<Integer> l1 = new LinkedList<Integer>();
|
||||
List<Integer> l2 = new ArrayList<Integer>();
|
||||
|
||||
l1.add(1);
|
||||
l1.add(5);
|
||||
l1.add(3);
|
||||
l1.add(10);
|
||||
l1.add(4);
|
||||
l1.add(2);
|
||||
l2.add(1);
|
||||
l2.add(5);
|
||||
l2.add(3);
|
||||
l2.add(10);
|
||||
l2.add(4);
|
||||
l2.add(2);
|
||||
l1.add(1);
|
||||
l1.add(5);
|
||||
l1.add(3);
|
||||
l1.add(10);
|
||||
l1.add(4);
|
||||
l1.add(2);
|
||||
l2.add(1);
|
||||
l2.add(5);
|
||||
l2.add(3);
|
||||
l2.add(10);
|
||||
l2.add(4);
|
||||
l2.add(2);
|
||||
|
||||
Predicate<Integer> p = new Predicate<Integer>() {
|
||||
public boolean apply(Integer i) {
|
||||
return i > 2;
|
||||
}
|
||||
};
|
||||
filterInPlace(p, l1);
|
||||
filterInPlace(p, l2);
|
||||
Predicate<Integer> p = new Predicate<Integer>() {
|
||||
public boolean apply(Integer i) {
|
||||
return i > 2;
|
||||
}
|
||||
};
|
||||
filterInPlace(p, l1);
|
||||
filterInPlace(p, l2);
|
||||
|
||||
for ( int i = 0 ; i < l1.size(); i++ ) System.out.print(" "+l1.get(i));
|
||||
System.out.println();
|
||||
for ( int i = 0 ; i < l2.size(); i++ ) System.out.print(" " + l2.get(i));
|
||||
System.out.println();
|
||||
for ( int i = 0 ; i < l1.size(); i++ ) System.out.print(" "+l1.get(i));
|
||||
System.out.println();
|
||||
for ( int i = 0 ; i < l2.size(); i++ ) System.out.print(" " + l2.get(i));
|
||||
System.out.println();
|
||||
|
||||
}
|
||||
|
||||
*/
|
||||
|
||||
/**
|
||||
* a helper method. Turns a single character string into a char.
|
||||
*
|
||||
* @param str the string
|
||||
*
|
||||
* @return a char
|
||||
*/
|
||||
public static char stringToChar(String str) {
|
||||
if (str.length() != 1) throw new IllegalArgumentException("String length must be one");
|
||||
return str.charAt(0);
|
||||
}
|
||||
|
||||
*/
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@ package org.broadinstitute.sting.utils.genotype;
|
|||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
|
|
@ -153,4 +155,17 @@ public class BasicGenotype implements Genotype {
|
|||
if (!isVariant(this.mRef)) throw new IllegalStateException("this genotype is not a variant");
|
||||
return new BasicVariation(this.getBases(), String.valueOf(mRef), this.getBases().length(), mLocation, mNegLog10PError);
|
||||
}
|
||||
|
||||
/**
|
||||
* Turn a list of alleles into a genotype
|
||||
* @param alleles the list of alleles
|
||||
* @return a string representation of this list
|
||||
*/
|
||||
public static String alleleListToString(List<String> alleles) {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
for (String allele : alleles)
|
||||
builder.append(allele);
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,9 +1,11 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* User: aaron
|
||||
|
|
@ -40,6 +42,7 @@ public class BasicVariation implements Variation {
|
|||
public BasicVariation(String bases, String reference, int length, GenomeLoc location, double confidence) {
|
||||
mBases = bases;
|
||||
mRef = reference;
|
||||
if (mRef.length() != 1) throw new StingException("The reference must be a single base");
|
||||
mLength = length;
|
||||
mLocation = location;
|
||||
mConfidence = confidence;
|
||||
|
|
@ -82,23 +85,6 @@ public class BasicVariation implements Variation {
|
|||
return (mLength < 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAlternateBases() {
|
||||
return mBases;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate bases. Use this method if teh allele count is greater then 2
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public List<String> getAlternateBaseList() {
|
||||
List<String> list = new ArrayList<String>();
|
||||
list.add(this.getAlternateBases());
|
||||
return list;
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc getLocation() {
|
||||
return mLocation;
|
||||
|
|
@ -112,7 +98,7 @@ public class BasicVariation implements Variation {
|
|||
/** are we bi-allelic? */
|
||||
@Override
|
||||
public boolean isBiallelic() {
|
||||
return true;
|
||||
return (getAlternateAlleleList().size() == 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -120,6 +106,40 @@ public class BasicVariation implements Variation {
|
|||
return mConfidence;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate alleles. This method should return all the alleles present at the location,
|
||||
* NOT including the reference base. This is returned as a string list with no guarantee ordering
|
||||
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
|
||||
* frequency).
|
||||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
@Override
|
||||
public List<String> getAlternateAlleleList() {
|
||||
List<String> list = new ArrayList<String>();
|
||||
for (char c : this.mBases.toCharArray())
|
||||
if (c != Utils.stringToChar(mRef))
|
||||
list.add(String.valueOf(c));
|
||||
return list;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alleles. This method should return all the alleles present at the location,
|
||||
* including the reference base. The first allele should always be the reference allele, followed
|
||||
* by an unordered list of alternate alleles.
|
||||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
@Override
|
||||
public List<String> getAlleleList() {
|
||||
List<String> list = new ArrayList<String>();
|
||||
if (this.mBases.contains(mRef)) list.add(mRef);
|
||||
for (char c : this.mBases.toCharArray())
|
||||
if (c != Utils.stringToChar(mRef))
|
||||
list.add(String.valueOf(c));
|
||||
return list;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isReference() {
|
||||
if (mLength != 0) return false;
|
||||
|
|
@ -149,11 +169,8 @@ public class BasicVariation implements Variation {
|
|||
@Override
|
||||
public char getAlternativeBaseForSNP() {
|
||||
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
|
||||
|
||||
// we know that if we're a snp, the reference is a single base, so charAt(0) is safe
|
||||
if (getAlternateBases().charAt(0) == this.getReference().charAt(0))
|
||||
return getAlternateBases().charAt(1);
|
||||
return getAlternateBases().charAt(0);
|
||||
if (!this.isBiallelic() || this.getAlternateAlleleList().size() != 1) throw new IllegalStateException("we're not biallelic");
|
||||
return Utils.stringToChar(this.getAlternateAlleleList().get(0));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -164,11 +181,8 @@ public class BasicVariation implements Variation {
|
|||
@Override
|
||||
public char getReferenceForSNP() {
|
||||
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
|
||||
|
||||
// we know that if we're a snp, the reference is a single base, so charAt(0) is safe
|
||||
if (getAlternateBases().charAt(0) == this.getReference().charAt(0))
|
||||
return getAlternateBases().charAt(0);
|
||||
return getAlternateBases().charAt(1);
|
||||
if (!this.isBiallelic()) throw new IllegalStateException("we're not biallelic");
|
||||
return Utils.stringToChar(this.mRef);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -14,14 +14,21 @@ import java.util.List;
|
|||
public interface Variation {
|
||||
// the types of variants we currently allow
|
||||
public enum VARIANT_TYPE {
|
||||
SNP, INDEL, REFERENCE // though reference is not really a variant
|
||||
SNP, INDEL, REFERENCE // though reference is not really a variant, we need to represent it
|
||||
}
|
||||
|
||||
/** are we bi-allelic? */
|
||||
public boolean isBiallelic();
|
||||
|
||||
/**
|
||||
* get the frequency of this variant, if we're a variant. If we're reference this method
|
||||
* should return 0.
|
||||
* should return 0. If we can't provide an alternate allele frequency, this should also
|
||||
* return 0.
|
||||
*
|
||||
* @return double with the stored frequency
|
||||
* WARNING: This method is only valid for biAllelic data, the contract is to check isBiallelic()
|
||||
* before calling this method
|
||||
*
|
||||
* @return double the minor allele frequency
|
||||
*/
|
||||
public double getNonRefAlleleFrequency();
|
||||
|
||||
|
|
@ -32,7 +39,8 @@ public interface Variation {
|
|||
public VARIANT_TYPE getType();
|
||||
|
||||
/**
|
||||
* are we a SNP? If not we're a Indel/deletion or the reference
|
||||
* are we a SNP? If not we're a Indel/deletion or the reference. This method must be call before you use
|
||||
* the convenience methods getAlternativeBaseForSNP or getReferenceForSNP, to ensure that you're working with a SNP
|
||||
*
|
||||
* @return true if we're a SNP
|
||||
*/
|
||||
|
|
@ -60,22 +68,26 @@ public interface Variation {
|
|||
public boolean isReference();
|
||||
|
||||
/**
|
||||
* get the location that this Variant represents
|
||||
* are we an insertion or a deletion? yes, then return true. No? false.
|
||||
*
|
||||
* @return true if we're an insertion or deletion
|
||||
*/
|
||||
public boolean isIndel();
|
||||
|
||||
/**
|
||||
* get the location of this Variant
|
||||
*
|
||||
* @return a GenomeLoc
|
||||
*/
|
||||
public GenomeLoc getLocation();
|
||||
|
||||
/**
|
||||
* get the reference base(s) at this position
|
||||
* get the reference base(s) for this Variant
|
||||
*
|
||||
* @return the reference base or bases, as a string
|
||||
*/
|
||||
public String getReference();
|
||||
|
||||
/** are we bi-allelic? */
|
||||
public boolean isBiallelic();
|
||||
|
||||
/**
|
||||
* get the -1 * (log 10 of the error value)
|
||||
*
|
||||
|
|
@ -83,26 +95,26 @@ public interface Variation {
|
|||
*/
|
||||
public double getNegLog10PError();
|
||||
|
||||
/**
|
||||
* gets the alternate base. Use this method if we're biallelic
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public String getAlternateBases();
|
||||
|
||||
/**
|
||||
* gets the alternate bases. Use this method if the allele count is greater then 2 (not biallelic)
|
||||
* gets the alternate alleles. This method should return all the alleles present at the location,
|
||||
* NOT including the reference base. This is returned as a string list with no guarantee ordering
|
||||
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
|
||||
* frequency).
|
||||
*
|
||||
* @return
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
public List<String> getAlternateBaseList();
|
||||
public List<String> getAlternateAlleleList();
|
||||
|
||||
/**
|
||||
* are we an insertion or a deletion? yes, then return true. No? false.
|
||||
* gets the alleles. This method should return all the alleles present at the location,
|
||||
* including the reference base. The first allele should always be the reference allele, followed
|
||||
* by an unordered list of alternate alleles. If the reference base is not an allele in this varation
|
||||
* it will not be in the list (i.e. there is no guarantee that the reference base is in the list).
|
||||
*
|
||||
* @return true if we're an insertion or deletion
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
public boolean isIndel();
|
||||
public List<String> getAlleleList();
|
||||
|
||||
/**
|
||||
* gets the alternate base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
|
||||
|
|
|
|||
|
|
@ -56,10 +56,10 @@ public class rodDbSNPTest extends BaseTest {
|
|||
rodDbSNP var = (rodDbSNP)rod;
|
||||
if (rod.isSNP()) {
|
||||
// quick check, if we're not triallelic, make sure the ref is right
|
||||
if (var.getRefSnpFWD() == var.refBases.charAt(0) || var.getAltSnpFWD() == var.refBases.charAt(0))
|
||||
if (var.getReferenceForSNP() == var.refBases.charAt(0) || var.getAlternativeBaseForSNP() == var.refBases.charAt(0))
|
||||
// also make sure the ref is a single character
|
||||
if (var.refBases.length() == 1)
|
||||
Assert.assertTrue(var.refBases.charAt(0)==var.getRefSnpFWD());
|
||||
Assert.assertTrue(var.refBases.charAt(0)==var.getReferenceForSNP());
|
||||
if (var.getLocation().getContig().equals("1") &&
|
||||
var.getLocation().getStart() >= 10000000 &&
|
||||
var.getLocation().getStart() <= 11000000) {
|
||||
|
|
|
|||
|
|
@ -82,7 +82,7 @@ public class VariantEvalWalkerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testEvalVariantRODOutputViolations() {
|
||||
List<String> md5 = new ArrayList<String>();
|
||||
md5.add("ad2ca71dfa7e45f369380178c4f8e69f");
|
||||
md5.add("d84e5b2a23ab1cf028145f09cd1e9f5b");
|
||||
|
||||
/**
|
||||
* the above MD5 was calculated from running the following command:
|
||||
|
|
|
|||
Loading…
Reference in New Issue