Adding the VCF ROD. Also changed the VCF objects to much more user friendly.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1658 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d9ee515a9b
commit
7b39aa4966
|
|
@ -15,6 +15,8 @@ import java.io.File;
|
|||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -123,6 +125,12 @@ public class RodGLF implements ReferenceOrderedDatum, Variation, Iterator<RodGLF
|
|||
return mRecord.getRefBase().toString();
|
||||
}
|
||||
|
||||
/** are we bi-allelic? */
|
||||
@Override
|
||||
public boolean isBiallelic() {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if all observed alleles are reference alleles. All is<Variant> methods (where Variant=SNP,Insertion, etc) should
|
||||
* return false at such site to ensure consistency. This method is included for use with genotyping calls (isGenotype()==true), it makes
|
||||
|
|
@ -154,9 +162,9 @@ public class RodGLF implements ReferenceOrderedDatum, Variation, Iterator<RodGLF
|
|||
@Override
|
||||
public char getAlternativeBaseForSNP() {
|
||||
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
|
||||
if (getAlternateBases().charAt(0) == this.getReference().charAt(0))
|
||||
return getAlternateBases().charAt(1);
|
||||
return getAlternateBases().charAt(0);
|
||||
if (getAlternateBase().charAt(0) == this.getReference().charAt(0))
|
||||
return getAlternateBase().charAt(1);
|
||||
return getAlternateBase().charAt(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -168,9 +176,9 @@ public class RodGLF implements ReferenceOrderedDatum, Variation, Iterator<RodGLF
|
|||
@Override
|
||||
public char getReferenceForSNP() {
|
||||
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
|
||||
if (getAlternateBases().charAt(0) == this.getReference().charAt(0))
|
||||
return getAlternateBases().charAt(0);
|
||||
return getAlternateBases().charAt(1);
|
||||
if (getAlternateBase().charAt(0) == this.getReference().charAt(0))
|
||||
return getAlternateBase().charAt(0);
|
||||
return getAlternateBase().charAt(1);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -252,10 +260,22 @@ public class RodGLF implements ReferenceOrderedDatum, Variation, Iterator<RodGLF
|
|||
* @return a string, of ploidy
|
||||
*/
|
||||
@Override
|
||||
public String getAlternateBases() {
|
||||
public String getAlternateBase() {
|
||||
return this.getBestGenotype(0).toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate bases. Use this method if teh allele count is greater then 2
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public List<String> getAlternateBases() {
|
||||
List<String> list = new ArrayList<String>();
|
||||
list.add(this.getAlternateBase());
|
||||
return list;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns minor allele frequency.
|
||||
*
|
||||
|
|
@ -270,8 +290,7 @@ public class RodGLF implements ReferenceOrderedDatum, Variation, Iterator<RodGLF
|
|||
@Override
|
||||
public VARIANT_TYPE getType() {
|
||||
if (this.isSNP()) return VARIANT_TYPE.SNP;
|
||||
else if (this.isInsertion()) return VARIANT_TYPE.INDEL;
|
||||
else if (this.isDeletion()) return VARIANT_TYPE.DELETION;
|
||||
else if (this.isInsertion() || this.isDeletion()) return VARIANT_TYPE.INDEL;
|
||||
else return VARIANT_TYPE.REFERENCE;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -25,20 +25,21 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.genotype.BasicGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public class RodGeliText extends BasicReferenceOrderedDatum implements Variation, VariantBackedByGenotype, AllelicVariant {
|
||||
public enum Genotype {
|
||||
public enum Genotype_Strings {
|
||||
AA, AC, AG, AT, CC, CG, CT, GG, GT, TT
|
||||
}
|
||||
|
||||
|
|
@ -187,10 +188,22 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements Variation
|
|||
* @return a string, of ploidy
|
||||
*/
|
||||
@Override
|
||||
public String getAlternateBases() {
|
||||
public String getAlternateBase() {
|
||||
return this.bestGenotype;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate bases. If this is homref, throws an UnsupportedOperationException
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public List<String> getAlternateBases() {
|
||||
List<String> list = new ArrayList<String>();
|
||||
list.add(this.getAlternateBase());
|
||||
return list;
|
||||
}
|
||||
|
||||
public boolean isIndel() {
|
||||
return false;
|
||||
}
|
||||
|
|
@ -306,7 +319,7 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements Variation
|
|||
if (genotypeLikelihoods[likelihoodIndex] > bestLikelihood) {
|
||||
bestLikelihood = genotypeLikelihoods[likelihoodIndex];
|
||||
|
||||
bestGenotype = Genotype.values()[likelihoodIndex].toString();
|
||||
bestGenotype = Genotype_Strings.values()[likelihoodIndex].toString();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -317,8 +330,8 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements Variation
|
|||
}
|
||||
|
||||
for (int likelihoodIndex = 0; likelihoodIndex < likelihoods.length; likelihoodIndex++) {
|
||||
if (refBase == Genotype.values()[likelihoodIndex].toString().charAt(0) &&
|
||||
refBase == Genotype.values()[likelihoodIndex].toString().charAt(1)) {
|
||||
if (refBase == Genotype_Strings.values()[likelihoodIndex].toString().charAt(0) &&
|
||||
refBase == Genotype_Strings.values()[likelihoodIndex].toString().charAt(1)) {
|
||||
refLikelihood = genotypeLikelihoods[likelihoodIndex];
|
||||
}
|
||||
}
|
||||
|
|
@ -328,19 +341,28 @@ public class RodGeliText extends BasicReferenceOrderedDatum implements Variation
|
|||
this.lodBtnb = (bestLikelihood - nextBestLikelihood);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* get the likelihoods
|
||||
*
|
||||
* @return an array in lexigraphical order of the likelihoods
|
||||
*/
|
||||
@Override
|
||||
public org.broadinstitute.sting.utils.genotype.Genotype getGenotype(DiploidGenotype x) {
|
||||
// figure out the best -switch to this
|
||||
/*Integer values[] = Utils.SortPermutation(this.genotypeLikelihoods);
|
||||
int indexThis = x.ordinal();
|
||||
int other = (x.toString().equals(Genotype.values()[values[1]])) ? values[9] : values[8];
|
||||
double lod = (genotypeLikelihoods[indexThis] - genotypeLikelihoods[other]);*/
|
||||
return new BasicGenotype(getLocation(),x.toString(), refBase, lodBtnb);
|
||||
public Genotype getGenotype(DiploidGenotype x) {
|
||||
if (x.toString() != this.getAltBasesFWD()) throw new IllegalStateException("We don't contain that genotype!");
|
||||
return new BasicGenotype(getLocation(), x.toString(), refBase, lodBtnb);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the likelihoods
|
||||
*
|
||||
* @return an array in lexigraphical order of the likelihoods
|
||||
*/
|
||||
@Override
|
||||
public List<Genotype> getGenotypes() {
|
||||
List<Genotype> ret = new ArrayList<Genotype>();
|
||||
ret.add(new BasicGenotype(getLocation(), this.getAltBasesFWD(), refBase, lodBtnb));
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -187,10 +187,22 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements
|
|||
* @return
|
||||
*/
|
||||
@Override
|
||||
public String getAlternateBases() {
|
||||
public String getAlternateBase() {
|
||||
return this.feature;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate bases. Use this method if teh allele count is greater then 2
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public List<String> getAlternateBases() {
|
||||
List<String> list = new ArrayList<String>();
|
||||
list.add(this.getAlternateBase());
|
||||
return list;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the frequency of this variant
|
||||
*
|
||||
|
|
@ -247,6 +259,18 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements
|
|||
public boolean isBiallelic() { return true; }
|
||||
public int length() { return 1; }
|
||||
|
||||
/**
|
||||
* get the likelihoods
|
||||
*
|
||||
* @return an array in lexigraphical order of the likelihoods
|
||||
*/
|
||||
@Override
|
||||
public List<Genotype> getGenotypes() {
|
||||
List<Genotype> ret = new ArrayList<Genotype>();
|
||||
ret.add(new BasicGenotype(this.getLocation(),this.feature,this.getRefSnpFWD(),this.getConsensusConfidence()));
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the likelihoods
|
||||
*
|
||||
|
|
@ -257,7 +281,7 @@ public class RodGenotypeChipAsGFF extends BasicReferenceOrderedDatum implements
|
|||
if (!x.toString().equals(this.getAltBasesFWD())) throw new IllegalStateException("Unable to retrieve genotype");
|
||||
return new BasicGenotype(this.getLocation(),this.feature,this.getRefSnpFWD(),this.getConsensusConfidence());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* do we have the specified genotype? not all backedByGenotypes
|
||||
* have all the genotype data.
|
||||
|
|
|
|||
|
|
@ -0,0 +1,329 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class RodVCF
|
||||
* <p/>
|
||||
* An implementation of the ROD for VCF.
|
||||
*/
|
||||
public class RodVCF extends BasicReferenceOrderedDatum implements Variation, VariantBackedByGenotype, Iterator<RodVCF> {
|
||||
// our VCF related information
|
||||
private VCFReader mReader;
|
||||
public VCFRecord mCurrentRecord;
|
||||
|
||||
public RodVCF(String name) {
|
||||
super(name);
|
||||
}
|
||||
|
||||
public void assertNotNull() {
|
||||
if (mCurrentRecord == null) {
|
||||
throw new UnsupportedOperationException("The current Record is null");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean parseLine(Object header, String[] parts) throws IOException {
|
||||
throw new UnsupportedOperationException("We don't support the parse line");
|
||||
}
|
||||
|
||||
public Object initialize(final File source) throws FileNotFoundException {
|
||||
if (mReader == null) {
|
||||
mReader = new VCFReader(source);
|
||||
}
|
||||
return mReader.getHeader();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
if (this.mCurrentRecord != null)
|
||||
return this.mCurrentRecord.toString();
|
||||
else
|
||||
return "";
|
||||
}
|
||||
|
||||
public Iterator<RodVCF> createIterator(String name, File file) {
|
||||
RodVCF vcf = new RodVCF(name);
|
||||
try {
|
||||
vcf.initialize(file);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("Unable to find file " + file);
|
||||
}
|
||||
return vcf;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the frequency of this variant
|
||||
*
|
||||
* @return VariantFrequency with the stored frequency
|
||||
*/
|
||||
@Override
|
||||
public double getNonRefAlleleFrequency() {
|
||||
assertNotNull();
|
||||
if (this.mCurrentRecord.getInfoValues().containsKey("AF")) {
|
||||
return Double.valueOf(this.mCurrentRecord.getInfoValues().get("AF"));
|
||||
} else {
|
||||
// this is the poor man's AF
|
||||
if (this.mCurrentRecord.getInfoValues().containsKey("AC") && this.mCurrentRecord.getInfoValues().containsKey("AN")) {
|
||||
String splt[] = mCurrentRecord.getInfoValues().get("AC").split(",");
|
||||
if (splt.length > 0) {
|
||||
return (Double.valueOf(splt[0]) / Double.valueOf(mCurrentRecord.getInfoValues().get("AN")));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
/** @return the VARIANT_TYPE of the current variant */
|
||||
@Override
|
||||
public VARIANT_TYPE getType() {
|
||||
if (this.isSNP()) return VARIANT_TYPE.SNP;
|
||||
else if (this.isIndel()) return VARIANT_TYPE.INDEL;
|
||||
return VARIANT_TYPE.REFERENCE;
|
||||
}
|
||||
|
||||
/**
|
||||
* are we a SNP? If not we're a Indel/deletion
|
||||
*
|
||||
* @return true if we're a SNP
|
||||
*/
|
||||
@Override
|
||||
public boolean isSNP() {
|
||||
this.assertNotNull();
|
||||
if (!mCurrentRecord.hasAlternateAllele())
|
||||
return false;
|
||||
for (String alt : this.mCurrentRecord.getAlternateAlleles()) {
|
||||
if (alt.length() != 1)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* are we an insertion?
|
||||
*
|
||||
* @return true if we are, false otherwise
|
||||
*/
|
||||
@Override
|
||||
public boolean isInsertion() {
|
||||
this.assertNotNull();
|
||||
if (!mCurrentRecord.hasAlternateAllele())
|
||||
return false;
|
||||
for (String alt : this.mCurrentRecord.getAlternateAlleles()) {
|
||||
if (alt.startsWith("I"))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* are we an insertion?
|
||||
*
|
||||
* @return true if we are, false otherwise
|
||||
*/
|
||||
@Override
|
||||
public boolean isDeletion() {
|
||||
this.assertNotNull();
|
||||
if (!mCurrentRecord.hasAlternateAllele())
|
||||
return false;
|
||||
for (String alt : this.mCurrentRecord.getAlternateAlleles()) {
|
||||
if (alt.startsWith("D"))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc getLocation() {
|
||||
this.assertNotNull();
|
||||
return GenomeLocParser.createGenomeLoc(mCurrentRecord.getChromosome(), mCurrentRecord.getPosition());
|
||||
}
|
||||
|
||||
/**
|
||||
* get the reference base(s) at this position
|
||||
*
|
||||
* @return the reference base or bases, as a string
|
||||
*/
|
||||
@Override
|
||||
public String getReference() {
|
||||
return String.valueOf(mCurrentRecord.getReferenceBase());
|
||||
}
|
||||
|
||||
/** are we bi-allelic? */
|
||||
@Override
|
||||
public boolean isBiallelic() {
|
||||
return (this.getAlternateBases().size() == 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the -1 * (log 10 of the error value)
|
||||
*
|
||||
* @return the log based error estimate
|
||||
*/
|
||||
@Override
|
||||
public double getNegLog10PError() {
|
||||
// we're -10 log(error), we have to divide by 10
|
||||
return mCurrentRecord.getQual() / 10.0;
|
||||
}
|
||||
|
||||
/**
|
||||
* are we truely a variant, given a reference
|
||||
*
|
||||
* @return false if we're a variant(indel, delete, SNP, etc), true if we're not
|
||||
*/
|
||||
@Override
|
||||
public boolean isReference() {
|
||||
return (!mCurrentRecord.hasAlternateAllele());
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate bases. If this is homref, throws an UnsupportedOperationException
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public String getAlternateBase() {
|
||||
if (!this.isBiallelic()) throw new UnsupportedOperationException("We're not biallelic, so please call getAlternateBases instead");
|
||||
return this.mCurrentRecord.getAlternateAlleles().get(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate bases. If this is homref, throws an UnsupportedOperationException
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public List<String> getAlternateBases() {
|
||||
return this.mCurrentRecord.getAlternateAlleles();
|
||||
}
|
||||
|
||||
/**
|
||||
* are we an insertion or a deletion? yes, then return true. No? Well, false then.
|
||||
*
|
||||
* @return true if we're an insertion or deletion
|
||||
*/
|
||||
@Override
|
||||
public boolean isIndel() {
|
||||
return (!isSNP());
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
|
||||
* of
|
||||
*
|
||||
* @return a char, representing the alternate base
|
||||
*/
|
||||
@Override
|
||||
public char getAlternativeBaseForSNP() {
|
||||
if (!isSNP()) throw new IllegalStateException("we're not a SNP");
|
||||
return mCurrentRecord.getAlternateAlleles().get(0).charAt(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
|
||||
*
|
||||
* @return a char, representing the alternate base
|
||||
*/
|
||||
@Override
|
||||
public char getReferenceForSNP() {
|
||||
if (!isSNP()) throw new IllegalStateException("we're not a SNP");
|
||||
return mCurrentRecord.getReferenceBase();
|
||||
}
|
||||
|
||||
/**
|
||||
* get the genotypes
|
||||
*
|
||||
* @return a list of the genotypes
|
||||
*/
|
||||
@Override
|
||||
public List<Genotype> getGenotypes() {
|
||||
List<Genotype> genotypes = new ArrayList<Genotype>();
|
||||
if (!this.mCurrentRecord.hasGenotypeData()) {
|
||||
return genotypes;
|
||||
}
|
||||
double refQual = (this.getNegLog10PError());
|
||||
// add the reference
|
||||
for (VCFGenotypeRecord rec : mCurrentRecord.getVCFGenotypeRecords()) {
|
||||
double qual = 0;
|
||||
if (rec.getAlleles().equals(this.getReference()))
|
||||
qual = refQual;
|
||||
else if (rec.getFields().containsKey("GQ"))
|
||||
qual = Double.valueOf(rec.getFields().get("GQ")) / 10.0;
|
||||
genotypes.add(new BasicGenotype(this.getLocation(), Utils.join("", rec.getAlleles()), this.getReference().charAt(0), qual));
|
||||
}
|
||||
return genotypes;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* get the likelihoods
|
||||
*
|
||||
* @return an array in lexigraphical order of the likelihoods
|
||||
*/
|
||||
@Override
|
||||
public Genotype getGenotype(DiploidGenotype x) {
|
||||
if (x.toString().equals(getReference()))
|
||||
return new BasicGenotype(this.getLocation(), getReference(), this.getReference().charAt(0), 0);
|
||||
for (VCFGenotypeRecord record : mCurrentRecord.getVCFGenotypeRecords()) {
|
||||
if (Utils.join("", record.getAlleles()).equals(x.toString())) {
|
||||
double qual = 0.0;
|
||||
if (record.getAlleles().equals(this.getReference()))
|
||||
qual = this.getNegLog10PError();
|
||||
else if (record.getFields().containsKey("GQ"))
|
||||
qual = Double.valueOf(record.getFields().get("GQ")) / 10.0;
|
||||
return new BasicGenotype(this.getLocation(), Utils.join("", record.getAlleles()), this.getReference().charAt(0), qual);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* do we have the specified genotype? not all backedByGenotypes
|
||||
* have all the genotype data.
|
||||
*
|
||||
* @param x the genotype
|
||||
*
|
||||
* @return true if available, false otherwise
|
||||
*/
|
||||
@Override
|
||||
public boolean hasGenotype(DiploidGenotype x) {
|
||||
if (getGenotype(x) != null)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return (mReader.hasNext());
|
||||
}
|
||||
|
||||
@Override
|
||||
public RodVCF next() {
|
||||
mCurrentRecord = mReader.next();
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("You cannot remove from a VCF rod");
|
||||
}
|
||||
}
|
||||
|
|
@ -2,17 +2,20 @@ package org.broadinstitute.sting.gatk.refdata;
|
|||
|
||||
import net.sf.picard.util.SequenceUtil;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
import org.broadinstitute.sting.utils.genotype.BasicGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Example format:
|
||||
* 585 chr1 433 433 rs56289060 0 + - - -/C genomic insertion unknown 0 0 unknown between 1
|
||||
* 585 chr1 491 492 rs55998931 0 + C C C/T genomic single unknown 0 0 unknown exact 1
|
||||
*
|
||||
* 585 chr1 433 433 rs56289060 0 + - - -/C genomic insertion unknown 0 0 unknown between 1
|
||||
* 585 chr1 491 492 rs55998931 0 + C C C/T genomic single unknown 0 0 unknown exact 1
|
||||
* <p/>
|
||||
* User: mdepristo
|
||||
* Date: Feb 27, 2009
|
||||
* Time: 10:47:14 AM
|
||||
|
|
@ -60,7 +63,9 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements Variation, V
|
|||
// manipulating the SNP information
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
public GenomeLoc getLocation() { return loc; }
|
||||
public GenomeLoc getLocation() {
|
||||
return loc;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the reference base(s) at this position
|
||||
|
|
@ -86,7 +91,8 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements Variation, V
|
|||
return strand.equals("+");
|
||||
}
|
||||
|
||||
/** Returns bases in the reference allele as a String. String can be empty (as in insertion into
|
||||
/**
|
||||
* Returns bases in the reference allele as a String. String can be empty (as in insertion into
|
||||
* the reference), can contain a single character (as in SNP or one-base deletion), or multiple characters
|
||||
* (for longer indels).
|
||||
*
|
||||
|
|
@ -109,18 +115,18 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements Variation, V
|
|||
*/
|
||||
public char getRefSnpFWD() throws IllegalStateException {
|
||||
//System.out.printf("refbases is %s but %s%n", refBases, toString());
|
||||
if ( isIndel() ) throw new IllegalStateException("Variant is not a SNP");
|
||||
if (isIndel()) throw new IllegalStateException("Variant is not a SNP");
|
||||
// fix - at least this way we ensure that we'll get the other base compared to getAltBasesFWD()
|
||||
List<String> alleles = getAllelesFWD();
|
||||
String val = (alleles.get(0).equals(refBases) ? alleles.get(0) : alleles.get(1));
|
||||
return val.charAt(0);
|
||||
// if ( onFwdStrand() ) return refBases.charAt(0);
|
||||
// else return SequenceUtil.reverseComplement(refBases).charAt(0);
|
||||
// else return SequenceUtil.reverseComplement(refBases).charAt(0);
|
||||
}
|
||||
|
||||
public List<String> getAllelesFWD() {
|
||||
List<String> alleles = null;
|
||||
if ( onFwdStrand() )
|
||||
if (onFwdStrand())
|
||||
alleles = Arrays.asList(observed.split("/"));
|
||||
else
|
||||
alleles = Arrays.asList(SequenceUtil.reverseComplement(observed).split("/"));
|
||||
|
|
@ -148,13 +154,22 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements Variation, V
|
|||
public VARIANT_TYPE getType() {
|
||||
return VARIANT_TYPE.SNP;
|
||||
}// ----------------------------------------------------------------------
|
||||
|
||||
//
|
||||
// What kind of variant are we?
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
public boolean isSNP() { return varType.contains("single"); }
|
||||
public boolean isInsertion() { return varType.contains("insertion"); }
|
||||
public boolean isDeletion() { return varType.contains("deletion"); }
|
||||
public boolean isSNP() {
|
||||
return varType.contains("single");
|
||||
}
|
||||
|
||||
public boolean isInsertion() {
|
||||
return varType.contains("insertion");
|
||||
}
|
||||
|
||||
public boolean isDeletion() {
|
||||
return varType.contains("deletion");
|
||||
}
|
||||
|
||||
/**
|
||||
* get the base representation of this Variant
|
||||
|
|
@ -162,11 +177,25 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements Variation, V
|
|||
* @return a string, of ploidy
|
||||
*/
|
||||
@Override
|
||||
public String getAlternateBases() {
|
||||
public String getAlternateBase() {
|
||||
return getAllelesFWDString();
|
||||
}
|
||||
|
||||
public boolean isIndel() { return isInsertion() || isDeletion() || varType.contains("in-del"); }
|
||||
/**
|
||||
* gets the alternate bases. Use this method if teh allele count is greater then 2
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public List<String> getAlternateBases() {
|
||||
List<String> list = new ArrayList<String>();
|
||||
list.add(this.getAlternateBase());
|
||||
return list;
|
||||
}
|
||||
|
||||
public boolean isIndel() {
|
||||
return isInsertion() || isDeletion() || varType.contains("in-del");
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate base is the case of a SNP. Throws an IllegalStateException in the case
|
||||
|
|
@ -178,9 +207,9 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements Variation, V
|
|||
public char getAlternativeBaseForSNP() {
|
||||
return getAltSnpFWD(); /*
|
||||
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
|
||||
if (getAlternateBases().charAt(0) == this.getReference())
|
||||
return getAlternateBases().charAt(1);
|
||||
return getAlternateBases().charAt(0); */
|
||||
if (getAlternateBase().charAt(0) == this.getReference())
|
||||
return getAlternateBase().charAt(1);
|
||||
return getAlternateBase().charAt(0); */
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -193,10 +222,17 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements Variation, V
|
|||
return 0; //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
public boolean isReference() { return false; } // snp locations are never "reference", there's always a variant
|
||||
public boolean isReference() {
|
||||
return false;
|
||||
} // snp locations are never "reference", there's always a variant
|
||||
|
||||
public boolean isHapmap() { return validationStatus.contains("by-hapmap"); }
|
||||
public boolean is2Hit2Allele() { return validationStatus.contains("by-2hit-2allele"); }
|
||||
public boolean isHapmap() {
|
||||
return validationStatus.contains("by-hapmap");
|
||||
}
|
||||
|
||||
public boolean is2Hit2Allele() {
|
||||
return validationStatus.contains("by-2hit-2allele");
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -205,9 +241,9 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements Variation, V
|
|||
// ----------------------------------------------------------------------
|
||||
public String toString() {
|
||||
return String.format("%s\t%d\t%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%f\t%f\t%s\t%s\t%d",
|
||||
getLocation().getContig(), getLocation().getStart(), getLocation().getStop()+1,
|
||||
name, strand, refBases, observed, molType,
|
||||
varType, validationStatus, avHet, avHetSE, func, locType, weight );
|
||||
getLocation().getContig(), getLocation().getStart(), getLocation().getStop() + 1,
|
||||
name, strand, refBases, observed, molType,
|
||||
varType, validationStatus, avHet, avHetSE, func, locType, weight);
|
||||
}
|
||||
|
||||
public String toSimpleString() {
|
||||
|
|
@ -216,18 +252,18 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements Variation, V
|
|||
|
||||
public String toMediumString() {
|
||||
String s = String.format("%s:%s:%s", getLocation().toString(), name, getAllelesFWDString());
|
||||
if ( isSNP() ) s += ":SNP";
|
||||
if ( isIndel() ) s += ":Indel";
|
||||
if ( isHapmap() ) s += ":Hapmap";
|
||||
if ( is2Hit2Allele() ) s += ":2Hit";
|
||||
if (isSNP()) s += ":SNP";
|
||||
if (isIndel()) s += ":Indel";
|
||||
if (isHapmap()) s += ":Hapmap";
|
||||
if (is2Hit2Allele()) s += ":2Hit";
|
||||
return s;
|
||||
}
|
||||
|
||||
public String repl() {
|
||||
return String.format("%d\t%s\t%d\t%d\t%s\t0\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%f\t%f\t%s\t%s\t%d",
|
||||
585, getLocation().getContig(), getLocation().getStart()-1, getLocation().getStop(),
|
||||
name, strand, refBases, refBases, observed, molType,
|
||||
varType, validationStatus, avHet, avHetSE, func, locType, weight );
|
||||
585, getLocation().getContig(), getLocation().getStart() - 1, getLocation().getStop(),
|
||||
name, strand, refBases, refBases, observed, molType,
|
||||
varType, validationStatus, avHet, avHetSE, func, locType, weight);
|
||||
}
|
||||
|
||||
public boolean parseLine(final Object header, final String[] parts) {
|
||||
|
|
@ -235,12 +271,12 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements Variation, V
|
|||
String contig = parts[1];
|
||||
long start = Long.parseLong(parts[2]) + 1; // The final is 0 based
|
||||
long stop = Long.parseLong(parts[3]) + 1; // The final is 0 based
|
||||
loc = GenomeLocParser.parseGenomeLoc(contig, start, Math.max(start, stop-1));
|
||||
loc = GenomeLocParser.parseGenomeLoc(contig, start, Math.max(start, stop - 1));
|
||||
|
||||
name = parts[4];
|
||||
strand = parts[6];
|
||||
refBases = parts[7];
|
||||
if ( strand == "-" )
|
||||
if (strand == "-")
|
||||
refBases = BaseUtils.simpleReverseComplement(refBases);
|
||||
observed = parts[9];
|
||||
molType = parts[10];
|
||||
|
|
@ -253,69 +289,83 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements Variation, V
|
|||
weight = Integer.parseInt(parts[17]);
|
||||
//System.out.printf("Parsed %s%n", toString());
|
||||
return true;
|
||||
} catch( MalformedGenomeLocException ex ) {
|
||||
} catch (MalformedGenomeLocException ex) {
|
||||
// Just rethrow malformed genome locs; the ROD system itself will deal with these.
|
||||
throw ex;
|
||||
} catch( ArrayIndexOutOfBoundsException ex ) {
|
||||
} catch (ArrayIndexOutOfBoundsException ex) {
|
||||
// Just rethrow malformed genome locs; the ROD system itself will deal with these.
|
||||
throw new RuntimeException("Badly formed dbSNP line: " + ex);
|
||||
} catch ( RuntimeException e ) {
|
||||
} catch (RuntimeException e) {
|
||||
System.out.printf(" Exception caught during parsing DBSNP line %s%n", Utils.join(" <=> ", parts));
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
public String getAltBasesFWD() {
|
||||
public String getAltBasesFWD() {
|
||||
List<String> alleles = getAllelesFWD();
|
||||
return (alleles.get(0).equals(refBases) ? alleles.get(1) : alleles.get(0));
|
||||
}
|
||||
}
|
||||
|
||||
public char getAltSnpFWD() throws IllegalStateException {
|
||||
if ( !isSNP() )
|
||||
public char getAltSnpFWD() throws IllegalStateException {
|
||||
if (!isSNP())
|
||||
throw new IllegalStateException("I'm not a SNP");
|
||||
return getAltBasesFWD().charAt(0);
|
||||
}
|
||||
}
|
||||
|
||||
public double getConsensusConfidence() {
|
||||
// TODO Auto-generated method stub
|
||||
return Double.MAX_VALUE;
|
||||
}
|
||||
public double getConsensusConfidence() {
|
||||
// TODO Auto-generated method stub
|
||||
return Double.MAX_VALUE;
|
||||
}
|
||||
|
||||
public List<String> getGenotype() throws IllegalStateException {
|
||||
return Arrays.asList(Utils.join("", getAllelesFWD()));
|
||||
}
|
||||
public List<String> getGenotype() throws IllegalStateException {
|
||||
return Arrays.asList(Utils.join("", getAllelesFWD()));
|
||||
}
|
||||
|
||||
public double getMAF() {
|
||||
// Fixme: update to actually get MAF
|
||||
//return avHet;
|
||||
public double getMAF() {
|
||||
// Fixme: update to actually get MAF
|
||||
//return avHet;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
public double getHeterozygosity() {
|
||||
return avHet;
|
||||
}
|
||||
|
||||
public int getPloidy() throws IllegalStateException {
|
||||
// TODO Auto-generated method stub
|
||||
return 0;
|
||||
}
|
||||
public int getPloidy() throws IllegalStateException {
|
||||
// TODO Auto-generated method stub
|
||||
return 0;
|
||||
}
|
||||
|
||||
public double getVariationConfidence() {
|
||||
// TODO Auto-generated method stub
|
||||
return Double.MAX_VALUE;
|
||||
}
|
||||
public double getVariationConfidence() {
|
||||
// TODO Auto-generated method stub
|
||||
return Double.MAX_VALUE;
|
||||
}
|
||||
|
||||
public boolean isGenotype() {
|
||||
// TODO Auto-generated method stub
|
||||
return false;
|
||||
}
|
||||
public boolean isGenotype() {
|
||||
// TODO Auto-generated method stub
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean isBiallelic() {
|
||||
// TODO Auto-generated method stub
|
||||
return observed.indexOf('/')==observed.lastIndexOf('/');
|
||||
}
|
||||
public boolean isBiallelic() {
|
||||
// TODO Auto-generated method stub
|
||||
return observed.indexOf('/') == observed.lastIndexOf('/');
|
||||
}
|
||||
|
||||
public int length() { return (int)(loc.getStop() - loc.getStart() + 1); }
|
||||
public int length() {
|
||||
return (int) (loc.getStop() - loc.getStart() + 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the likelihoods
|
||||
*
|
||||
* @return an array in lexigraphical order of the likelihoods
|
||||
*/
|
||||
@Override
|
||||
public List<org.broadinstitute.sting.utils.genotype.Genotype> getGenotypes() {
|
||||
List<org.broadinstitute.sting.utils.genotype.Genotype> list = new ArrayList<org.broadinstitute.sting.utils.genotype.Genotype>();
|
||||
list.add(new BasicGenotype(this.getLocation(), this.getAltBasesFWD(), this.getRefSnpFWD(), this.getConsensusConfidence()));
|
||||
return list;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the likelihoods
|
||||
|
|
@ -325,7 +375,7 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements Variation, V
|
|||
@Override
|
||||
public org.broadinstitute.sting.utils.genotype.Genotype getGenotype(DiploidGenotype x) {
|
||||
if (!x.toString().equals(this.getAltBasesFWD())) throw new IllegalStateException("Unable to retrieve genotype");
|
||||
return new BasicGenotype(this.getLocation(),this.getAltBasesFWD(),this.getRefSnpFWD(),this.getConsensusConfidence());
|
||||
return new BasicGenotype(this.getLocation(), this.getAltBasesFWD(), this.getRefSnpFWD(), this.getConsensusConfidence());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -338,6 +388,6 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements Variation, V
|
|||
*/
|
||||
@Override
|
||||
public boolean hasGenotype(DiploidGenotype x) {
|
||||
return (!x.toString().equals(this.getAltBasesFWD())) ? false : true;
|
||||
return (!x.toString().equals(this.getAltBasesFWD())) ? false : true;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -318,7 +318,7 @@ public class VariantFiltrationWalker extends LocusWalker<Integer, Integer> {
|
|||
if ( VariantsToVCF.generateVCFRecord(context.getTracker(), context.getReferenceContext(), context.getAlignmentContext(true), vcfHeader, gt, map, sampleNames, out, false, false) ) {
|
||||
if ( !filterFailureString.equals("") )
|
||||
map.put(VCFHeader.HEADER_FIELDS.FILTER, filterFailureString);
|
||||
vcfWriter.addRecord(new VCFRecord(vcfHeader, map, "GT:GQ:DP", gt));
|
||||
vcfWriter.addRecord(new VCFRecord(map, "GT:GQ:DP", gt));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -58,18 +58,18 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
|
|||
int truthIndex, callIndex;
|
||||
if (chip == null)
|
||||
truthIndex = UNKNOWN;
|
||||
else if (chip.getAlternateBases().equals(g.toString()))
|
||||
else if (chip.getAlternateBase().equals(g.toString()))
|
||||
truthIndex = REF;
|
||||
else if (chip.getAlternateBases().charAt(0) != chip.getAlternateBases().charAt(1))
|
||||
else if (chip.getAlternateBase().charAt(0) != chip.getAlternateBase().charAt(1))
|
||||
truthIndex = VAR_HET;
|
||||
else
|
||||
truthIndex = VAR_HOM;
|
||||
|
||||
if (eval == null)
|
||||
callIndex = NO_CALL;
|
||||
else if (eval.getAlternateBases().equals(g.toString()))
|
||||
else if (eval.getAlternateBase().equals(g.toString()))
|
||||
callIndex = REF;
|
||||
else if (eval.getAlternateBases().charAt(0) != eval.getAlternateBases().charAt(1))
|
||||
else if (eval.getAlternateBase().charAt(0) != eval.getAlternateBase().charAt(1))
|
||||
callIndex = VAR_HET;
|
||||
else
|
||||
callIndex = VAR_HOM;
|
||||
|
|
|
|||
|
|
@ -38,10 +38,10 @@ public class IndelMetricsAnalysis extends BasicVariantAnalysis implements Genoty
|
|||
else
|
||||
throw new RuntimeException("Variation is indel, but isn't insertion or deletion!");
|
||||
|
||||
if ( eval.getAlternateBases().length() < 100 ) {
|
||||
sizes[eval.isDeletion() ? 0 : 1][eval.getAlternateBases().length()]++;
|
||||
if ( eval.getAlternateBases().length() > maxSize )
|
||||
maxSize = eval.getAlternateBases().length();
|
||||
if ( eval.getAlternateBase().length() < 100 ) {
|
||||
sizes[eval.isDeletion() ? 0 : 1][eval.getAlternateBase().length()]++;
|
||||
if ( eval.getAlternateBase().length() > maxSize )
|
||||
maxSize = eval.getAlternateBase().length();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -5,8 +5,6 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
|
@ -185,7 +183,7 @@ class PooledConcordanceTable {
|
|||
|
||||
public boolean pooledCallIsRef(Variation eval, char ref) {
|
||||
// code broken out for easy alteration when we start using pool-specific variations
|
||||
return eval.getAlternateBases().equalsIgnoreCase((Utils.dupString(ref,2)));
|
||||
return eval.getAlternateBase().equalsIgnoreCase((Utils.dupString(ref,2)));
|
||||
}
|
||||
|
||||
public int calculateNumFrequencyIndeces(int poolSize) {
|
||||
|
|
@ -209,7 +207,7 @@ class PooledConcordanceTable {
|
|||
for ( Variation eval : evals ) {
|
||||
if ( mismatchingCalls(firstEval, eval, ref) ) {
|
||||
// todo -- make this not a StingException but go to the log
|
||||
throw new StingException("Tri-Allelic Position "+eval.getAlternateBases()+"/"+firstEval.getAlternateBases() + " Ref: "+ ref + " not supported");
|
||||
throw new StingException("Tri-Allelic Position "+eval.getAlternateBase()+"/"+firstEval.getAlternateBase() + " Ref: "+ ref + " not supported");
|
||||
} else {
|
||||
alternateFrequency += calledVariantFrequency(eval,ref);
|
||||
}
|
||||
|
|
@ -233,10 +231,10 @@ class PooledConcordanceTable {
|
|||
|
||||
public boolean mismatchingCalls(Variation eval, Variation chip, char ref) {
|
||||
// eval and chip guaranteed to be non-null
|
||||
char chipF = chip.getAlternateBases().charAt(0);
|
||||
char chipS = chip.getAlternateBases().charAt(1);
|
||||
char evalF = chip.getAlternateBases().charAt(0);
|
||||
char evalS = chip.getAlternateBases().charAt(1);
|
||||
char chipF = chip.getAlternateBase().charAt(0);
|
||||
char chipS = chip.getAlternateBase().charAt(1);
|
||||
char evalF = chip.getAlternateBase().charAt(0);
|
||||
char evalS = chip.getAlternateBase().charAt(1);
|
||||
boolean mismatch;
|
||||
if (chipF == ref) {
|
||||
if ( chipS == ref ) {
|
||||
|
|
@ -261,7 +259,7 @@ class PooledConcordanceTable {
|
|||
|
||||
public double calledVariantFrequency( Variation var, char ref ) {
|
||||
// code broken out for easy alteration when we start using pool-specific variations
|
||||
String varStr = var.getAlternateBases();
|
||||
String varStr = var.getAlternateBase();
|
||||
double freq;
|
||||
if ( varStr.charAt(0) != ref && varStr.charAt(1) != ref ) {
|
||||
freq = (double) 2;
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ public class VariantCounter extends BasicVariantAnalysis implements GenotypeAnal
|
|||
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||
nSNPs += eval == null ? 0 : 1;
|
||||
|
||||
if ( this.getMaster().evalContainsGenotypes && eval != null && eval.isSNP() && ((VariantBackedByGenotype)eval).getGenotype( DiploidGenotype.valueOf(eval.getAlternateBases())).isHet() )
|
||||
if ( this.getMaster().evalContainsGenotypes && eval != null && eval.isSNP() && ((VariantBackedByGenotype)eval).getGenotype( DiploidGenotype.valueOf(eval.getAlternateBase())).isHet() )
|
||||
nHets++;
|
||||
|
||||
return null;
|
||||
|
|
|
|||
|
|
@ -86,7 +86,7 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
|
|||
if (eval != null) {
|
||||
char alt = (eval.isSNP()) ? eval.getAlternativeBaseForSNP() : eval.getReference().charAt(0);
|
||||
if (dbSNP != null && dbSNP.isSNP())
|
||||
return !dbSNP.getAlternateBases().contains(String.valueOf(alt));
|
||||
return !dbSNP.getAlternateBase().contains(String.valueOf(alt));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
@ -117,7 +117,7 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
|
|||
if (dbsnp.isSNP() && eval.isSNP() && discordantP(dbsnp, eval)) {
|
||||
return String.format("Discordant [DBSNP %s] [EVAL %s]", dbsnp, eval);
|
||||
} else if (dbsnp.isIndel() && eval.isSNP()) {
|
||||
return String.format("SNP-at-indel DBSNP=%s %s", dbsnp.getAlternateBases(), eval);
|
||||
return String.format("SNP-at-indel DBSNP=%s %s", dbsnp.getAlternateBase(), eval);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ public class VariantsToVCF extends RefWalker<Integer, Integer> {
|
|||
List<VCFGenotypeRecord> gt = new ArrayList<VCFGenotypeRecord>();
|
||||
Map<VCFHeader.HEADER_FIELDS,String> map = new HashMap<VCFHeader.HEADER_FIELDS,String>();
|
||||
if ( generateVCFRecord(tracker, ref, context, vcfheader, gt, map, sampleNames, out, SUPPRESS_MULTISTATE, VERBOSE) ) {
|
||||
vcfwriter.addRecord(new VCFRecord(vcfheader, map, "GT:GQ:DP", gt));
|
||||
vcfwriter.addRecord(new VCFRecord(map, "GT:GQ:DP", gt));
|
||||
//vcfwriter.addRecord(new VCFRecord(vcfheader, map, "GT", gt));
|
||||
return 1;
|
||||
}
|
||||
|
|
@ -123,7 +123,7 @@ public class VariantsToVCF extends RefWalker<Integer, Integer> {
|
|||
int allele2 = BaseUtils.simpleBaseToBaseIndex(alleles.get(0).charAt(1));
|
||||
if (allele2 >= 0 && allele2 != refbase) { alleleCounts[allele2]++; }
|
||||
|
||||
gt.add(new VCFGenotypeRecord(name, str, alleles, VCFGenotypeRecord.PHASE.UNPHASED, ref.getBase()));
|
||||
gt.add(new VCFGenotypeRecord(name, alleles, VCFGenotypeRecord.PHASE.UNPHASED, str ));
|
||||
|
||||
numSNPs++;
|
||||
snpQual += av.getVariationConfidence();
|
||||
|
|
@ -134,7 +134,7 @@ public class VariantsToVCF extends RefWalker<Integer, Integer> {
|
|||
List<String> alleles = new ArrayList<String>();
|
||||
alleles.add(ref.getBase() + "" + ref.getBase());
|
||||
|
||||
gt.add(new VCFGenotypeRecord(name, str, alleles, VCFGenotypeRecord.PHASE.UNPHASED, ref.getBase()));
|
||||
gt.add(new VCFGenotypeRecord(name, alleles, VCFGenotypeRecord.PHASE.UNPHASED, str ));
|
||||
|
||||
numRefs++;
|
||||
}
|
||||
|
|
@ -184,7 +184,7 @@ public class VariantsToVCF extends RefWalker<Integer, Integer> {
|
|||
} else if (field == VCFHeader.HEADER_FIELDS.QUAL) {
|
||||
map.put(field, String.format("%d", snpQual > 99 ? 99 : (int) snpQual));
|
||||
} else if (field == VCFHeader.HEADER_FIELDS.FILTER) {
|
||||
map.put(field, String.valueOf("0"));
|
||||
map.put(field, "0");
|
||||
} else if (field == VCFHeader.HEADER_FIELDS.INFO) {
|
||||
String infostr = ".";
|
||||
ArrayList<String> info = new ArrayList<String>();
|
||||
|
|
|
|||
|
|
@ -2,6 +2,9 @@ package org.broadinstitute.sting.utils.genotype;
|
|||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
* User: aaron
|
||||
* Date: Sep 9, 2009
|
||||
|
|
@ -59,8 +62,7 @@ public class BasicVariation implements Variation {
|
|||
*/
|
||||
@Override
|
||||
public VARIANT_TYPE getType() {
|
||||
if (mLength > 0) return VARIANT_TYPE.INDEL;
|
||||
if (mLength < 0) return VARIANT_TYPE.DELETION;
|
||||
if (mLength != 0) return VARIANT_TYPE.INDEL;
|
||||
return (isSNP()) ? VARIANT_TYPE.SNP : VARIANT_TYPE.REFERENCE;
|
||||
}
|
||||
|
||||
|
|
@ -81,10 +83,22 @@ public class BasicVariation implements Variation {
|
|||
}
|
||||
|
||||
@Override
|
||||
public String getAlternateBases() {
|
||||
public String getAlternateBase() {
|
||||
return mBases;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate bases. Use this method if teh allele count is greater then 2
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public List<String> getAlternateBases() {
|
||||
List<String> list = new ArrayList<String>();
|
||||
list.add(this.getAlternateBase());
|
||||
return list;
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc getLocation() {
|
||||
return mLocation;
|
||||
|
|
@ -95,6 +109,12 @@ public class BasicVariation implements Variation {
|
|||
return (mRef);
|
||||
}
|
||||
|
||||
/** are we bi-allelic? */
|
||||
@Override
|
||||
public boolean isBiallelic() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getNegLog10PError() {
|
||||
return mConfidence;
|
||||
|
|
@ -131,9 +151,9 @@ public class BasicVariation implements Variation {
|
|||
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
|
||||
|
||||
// we know that if we're a snp, the reference is a single base, so charAt(0) is safe
|
||||
if (getAlternateBases().charAt(0) == this.getReference().charAt(0))
|
||||
return getAlternateBases().charAt(1);
|
||||
return getAlternateBases().charAt(0);
|
||||
if (getAlternateBase().charAt(0) == this.getReference().charAt(0))
|
||||
return getAlternateBase().charAt(1);
|
||||
return getAlternateBase().charAt(0);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -146,9 +166,9 @@ public class BasicVariation implements Variation {
|
|||
if (!this.isSNP()) throw new IllegalStateException("we're not a SNP");
|
||||
|
||||
// we know that if we're a snp, the reference is a single base, so charAt(0) is safe
|
||||
if (getAlternateBases().charAt(0) == this.getReference().charAt(0))
|
||||
return getAlternateBases().charAt(0);
|
||||
return getAlternateBases().charAt(1);
|
||||
if (getAlternateBase().charAt(0) == this.getReference().charAt(0))
|
||||
return getAlternateBase().charAt(0);
|
||||
return getAlternateBase().charAt(1);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
|
|
@ -13,13 +13,16 @@ public interface VariantBackedByGenotype {
|
|||
/**
|
||||
* get the likelihoods
|
||||
*
|
||||
* @param x the genotype
|
||||
* @return a map in lexigraphical order of the likelihoods
|
||||
*/
|
||||
public List<Genotype> getGenotypes();
|
||||
|
||||
/**
|
||||
* get the likelihoods
|
||||
*
|
||||
* @return an array in lexigraphical order of the likelihoods
|
||||
* @return a map in lexigraphical order of the likelihoods
|
||||
*/
|
||||
public Genotype getGenotype(DiploidGenotype x);
|
||||
|
||||
|
||||
/**
|
||||
* do we have the specified genotype? not all backedByGenotypes
|
||||
* have all the genotype data.
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@ package org.broadinstitute.sting.utils.genotype;
|
|||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
|
|
@ -12,7 +14,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
public interface Variation {
|
||||
// the types of variants we currently allow
|
||||
public enum VARIANT_TYPE {
|
||||
SNP, INDEL, DELETION, REFERENCE // though reference is not really a variant
|
||||
SNP, INDEL, REFERENCE // though reference is not really a variant
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -60,6 +62,9 @@ public interface Variation {
|
|||
*/
|
||||
public String getReference();
|
||||
|
||||
/** are we bi-allelic? */
|
||||
public boolean isBiallelic();
|
||||
|
||||
/**
|
||||
* get the -1 * (log 10 of the error value)
|
||||
*
|
||||
|
|
@ -69,31 +74,43 @@ public interface Variation {
|
|||
|
||||
/**
|
||||
* are we truely a variant, given a reference
|
||||
*
|
||||
* @return false if we're a variant(indel, delete, SNP, etc), true if we're not
|
||||
*/
|
||||
public boolean isReference();
|
||||
|
||||
/**
|
||||
* gets the alternate bases. If this is homref, throws an UnsupportedOperationException
|
||||
* gets the alternate base. Use this method if we're biallelic
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public String getAlternateBases();
|
||||
public String getAlternateBase();
|
||||
|
||||
/**
|
||||
* gets the alternate bases. Use this method if teh allele count is greater then 2
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public List<String> getAlternateBases();
|
||||
|
||||
/**
|
||||
* are we an insertion or a deletion? yes, then return true. No? Well, false then.
|
||||
*
|
||||
* @return true if we're an insertion or deletion
|
||||
*/
|
||||
public boolean isIndel();
|
||||
|
||||
/**
|
||||
* gets the alternate base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
|
||||
* of
|
||||
* of
|
||||
*
|
||||
* @return a char, representing the alternate base
|
||||
*/
|
||||
public char getAlternativeBaseForSNP();
|
||||
|
||||
/**
|
||||
* gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
|
||||
*
|
||||
* @return a char, representing the alternate base
|
||||
*/
|
||||
public char getReferenceForSNP();
|
||||
|
|
|
|||
|
|
@ -21,35 +21,33 @@ public class VCFGenotypeRecord {
|
|||
}
|
||||
|
||||
// our phasing
|
||||
private PHASE phaseType;
|
||||
|
||||
// our reference bases(s)
|
||||
private final char mReferenceBase;
|
||||
private PHASE mPhaseType;
|
||||
|
||||
// our bases(s)
|
||||
private final List<String> mAlleleBases = new ArrayList<String>();
|
||||
private final List<String> mGenotypeAlleles = new ArrayList<String>();
|
||||
|
||||
// our mapping of the format mFields to values
|
||||
private final Map<String, String> mFields = new HashMap<String, String>();
|
||||
|
||||
// our sample name
|
||||
private final String mSampleName;
|
||||
|
||||
/**
|
||||
* create a VCF record
|
||||
* Create a VCF genotype record
|
||||
*
|
||||
* @param keyValues the key values
|
||||
* @param Alleles the alleles, one if we're halpoid, two if we're diploid
|
||||
* @param phasing the phasing of the the genotype
|
||||
* @param referenceBase the reference base
|
||||
* @param sampleName
|
||||
* @param genotypes
|
||||
* @param phasing
|
||||
* @param otherFlags
|
||||
*/
|
||||
public VCFGenotypeRecord(String sampleName, Map<String, String> keyValues, List<String> Alleles, PHASE phasing, char referenceBase) {
|
||||
mSampleName = sampleName;
|
||||
mReferenceBase = referenceBase;
|
||||
mFields.putAll(keyValues);
|
||||
mAlleleBases.addAll(Alleles);
|
||||
phaseType = phasing;
|
||||
public VCFGenotypeRecord(String sampleName, List<String> genotypes, PHASE phasing, Map<String, String> otherFlags) {
|
||||
this.mSampleName = sampleName;
|
||||
this.mGenotypeAlleles.addAll(genotypes);
|
||||
this.mPhaseType = phasing;
|
||||
this.mFields.putAll(otherFlags);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* determine the phase of the genotype
|
||||
*
|
||||
|
|
@ -70,18 +68,55 @@ public class VCFGenotypeRecord {
|
|||
/** getter methods */
|
||||
|
||||
public PHASE getPhaseType() {
|
||||
return phaseType;
|
||||
return mPhaseType;
|
||||
}
|
||||
|
||||
public char getReference() {
|
||||
return mReferenceBase;
|
||||
public String getSampleName() {
|
||||
return mSampleName;
|
||||
}
|
||||
|
||||
public List<String> getAllele() {
|
||||
return mAlleleBases;
|
||||
public List<String> getAlleles() {
|
||||
return mGenotypeAlleles;
|
||||
}
|
||||
|
||||
public Map<String, String> getFields() {
|
||||
return mFields;
|
||||
}
|
||||
|
||||
public String toGenotypeString(List<String> altAlleles) {
|
||||
String str = "";
|
||||
boolean first = true;
|
||||
for (String allele : mGenotypeAlleles) {
|
||||
str += String.valueOf((altAlleles.contains(allele)) ? altAlleles.indexOf(allele) + 1 : 0);
|
||||
if (first) {
|
||||
switch (mPhaseType) {
|
||||
case UNPHASED:
|
||||
str += "/";
|
||||
break;
|
||||
case PHASED:
|
||||
str += "|";
|
||||
break;
|
||||
case PHASED_SWITCH_PROB:
|
||||
str += "\\";
|
||||
break;
|
||||
case UNKNOWN:
|
||||
throw new UnsupportedOperationException("Unknown phase type");
|
||||
}
|
||||
first = false;
|
||||
}
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
|
||||
public boolean equals(Object other) {
|
||||
if (other instanceof VCFGenotypeRecord) {
|
||||
if (((VCFGenotypeRecord) other).mPhaseType != this.mPhaseType) return false;
|
||||
if (!((VCFGenotypeRecord) other).mGenotypeAlleles.equals(this.mGenotypeAlleles)) return false;
|
||||
if (!((VCFGenotypeRecord) other).mFields.equals(mFields)) return false;
|
||||
if (!((VCFGenotypeRecord) other).mSampleName.equals(this.mSampleName)) return false;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -195,9 +195,9 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
|
|||
genotypeRecords.add(getVCFGenotype(str, mFormatString, tokens[index], values.get(VCFHeader.HEADER_FIELDS.ALT).split(","), values.get(VCFHeader.HEADER_FIELDS.REF).charAt(0)));
|
||||
index++;
|
||||
}
|
||||
return new VCFRecord(mHeader, values, mFormatString, genotypeRecords);
|
||||
return new VCFRecord(values, mFormatString, genotypeRecords);
|
||||
}
|
||||
return new VCFRecord(mHeader, values);
|
||||
return new VCFRecord(values);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -216,7 +216,6 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
|
|||
Map<String, String> tagToValue = new HashMap<String, String>();
|
||||
VCFGenotypeRecord.PHASE phase = VCFGenotypeRecord.PHASE.UNKNOWN;
|
||||
List<String> bases = new ArrayList<String>();
|
||||
int addedCount = 0;
|
||||
String keyStrings[] = formatString.split(":");
|
||||
for (String key : keyStrings) {
|
||||
String parse;
|
||||
|
|
@ -236,19 +235,20 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
|
|||
addAllele(m.group(1), altAlleles, referenceBase, bases);
|
||||
if (m.group(3).length() > 0) addAllele(m.group(3), altAlleles, referenceBase, bases);
|
||||
}
|
||||
tagToValue.put(key, parse);
|
||||
addedCount++;
|
||||
else {
|
||||
tagToValue.put(key, parse);
|
||||
}
|
||||
if (nextDivider + 1 >= genotypeString.length()) nextDivider = genotypeString.length() - 1;
|
||||
genotypeString = genotypeString.substring(nextDivider + 1, genotypeString.length());
|
||||
}
|
||||
// catch some common errors, either there are too many field keys or there are two many field values
|
||||
if (keyStrings.length != tagToValue.size())
|
||||
if (keyStrings.length != tagToValue.size() + ((bases.size() > 0) ? 1 : 0))
|
||||
throw new RuntimeException("VCFReader: genotype value count doesn't match the key count (expected "
|
||||
+ keyStrings.length + " but saw " + tagToValue.size() + ")");
|
||||
else if (genotypeString.length() > 0)
|
||||
throw new RuntimeException("VCFReader: genotype string contained additional unprocessed fields: " + genotypeString
|
||||
+ ". This most likely means that the format string is shorter then the value fields.");
|
||||
return new VCFGenotypeRecord(sampleName, tagToValue, bases, phase, referenceBase);
|
||||
return new VCFGenotypeRecord(sampleName, bases, phase, tagToValue);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
|
||||
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
|
|
@ -8,90 +10,165 @@ import java.util.Map;
|
|||
|
||||
/** the basic VCF record type */
|
||||
public class VCFRecord {
|
||||
// required field values
|
||||
private final Map<VCFHeader.HEADER_FIELDS, String> mValues = new HashMap<VCFHeader.HEADER_FIELDS, String>();
|
||||
public static final String FIELD_SEPERATOR = "\t";
|
||||
// the reference base
|
||||
private char mReferenceBase;
|
||||
// our contig
|
||||
private String mChrome;
|
||||
// our position
|
||||
private int mPosition;
|
||||
// our id; set to '.' if not available
|
||||
private String mID;
|
||||
// the alternate bases
|
||||
private final List<String> mAlts = new ArrayList<String>();
|
||||
// our qual value
|
||||
private int mQual;
|
||||
// our filter string
|
||||
private String mFilterString;
|
||||
// our info fields
|
||||
private final Map<String, String> mInfoFields = new HashMap<String, String>();
|
||||
|
||||
private final String mGenotypeFormatString;
|
||||
|
||||
// our genotype sample fields
|
||||
private final List<VCFGenotypeRecord> mGenotypeFields;
|
||||
|
||||
// the format String, which specifies what each genotype can contain for values
|
||||
private final String mFormatString;
|
||||
|
||||
// the associated header
|
||||
private final VCFHeader mHeader;
|
||||
private final List<VCFGenotypeRecord> mGenotypeFields = new ArrayList<VCFGenotypeRecord>();
|
||||
|
||||
/**
|
||||
* given a VCF header, and the values for each of the columns, create a VCF record.
|
||||
*
|
||||
* @param header the VCF header
|
||||
* @param columnValues a mapping of header strings to values
|
||||
* @param columnValues a mapping of header strings to values
|
||||
* @param formatString the format string for the genotype records
|
||||
* @param genotypeRecords the genotype records
|
||||
*/
|
||||
public VCFRecord(VCFHeader header, Map<VCFHeader.HEADER_FIELDS, String> columnValues, String formatString, List<VCFGenotypeRecord> genotypeRecords) {
|
||||
mHeader = header;
|
||||
mValues.putAll(columnValues);
|
||||
mFormatString = formatString;
|
||||
mGenotypeFields = new ArrayList<VCFGenotypeRecord>();
|
||||
public VCFRecord(Map<VCFHeader.HEADER_FIELDS, String> columnValues, String formatString, List<VCFGenotypeRecord> genotypeRecords) {
|
||||
extractFields(columnValues);
|
||||
mGenotypeFields.addAll(genotypeRecords);
|
||||
mGenotypeFormatString = formatString;
|
||||
}
|
||||
|
||||
/**
|
||||
* create a VCF record
|
||||
*
|
||||
* @param referenceBase the reference base to use
|
||||
* @param contig the contig this variant is on
|
||||
* @param position our position
|
||||
* @param ID our ID string
|
||||
* @param altBases the list of alternate bases
|
||||
* @param qual the qual field
|
||||
* @param filters the filters used on this variant
|
||||
* @param infoFields the information fields
|
||||
* @param genotypeObjects the genotype objects
|
||||
*/
|
||||
public VCFRecord(char referenceBase,
|
||||
String contig,
|
||||
int position,
|
||||
String ID,
|
||||
List<String> altBases,
|
||||
int qual,
|
||||
String filters,
|
||||
Map<String, String> infoFields,
|
||||
String genotypeFormatString,
|
||||
List<VCFGenotypeRecord> genotypeObjects) {
|
||||
this.setReferenceBase(referenceBase);
|
||||
this.mChrome = contig;
|
||||
this.setPosition(position);
|
||||
this.mID = ID;
|
||||
for (String alt : altBases)
|
||||
this.addAlternateBase(alt);
|
||||
this.setQual(qual);
|
||||
this.setFilterString(filters);
|
||||
this.mInfoFields.putAll(infoFields);
|
||||
mGenotypeFormatString = genotypeFormatString;
|
||||
this.mGenotypeFields.addAll(genotypeObjects);
|
||||
}
|
||||
|
||||
/**
|
||||
* given a VCF header, and the values for each of the columns, create a VCF record.
|
||||
*
|
||||
* @param header the VCF header
|
||||
* @param columnValues a mapping of header strings to values
|
||||
* @param columnValues a mapping of header strings to values
|
||||
*/
|
||||
public VCFRecord(VCFHeader header, Map<VCFHeader.HEADER_FIELDS, String> columnValues) {
|
||||
mHeader = header;
|
||||
mValues.putAll(columnValues);
|
||||
mGenotypeFields = null;
|
||||
mFormatString = null;
|
||||
public VCFRecord(Map<VCFHeader.HEADER_FIELDS, String> columnValues) {
|
||||
extractFields(columnValues);
|
||||
mGenotypeFormatString = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* extract the field values from the passed in array
|
||||
*
|
||||
* @param columnValues a map of the header fields to values
|
||||
*/
|
||||
private void extractFields(Map<VCFHeader.HEADER_FIELDS, String> columnValues) {
|
||||
for (VCFHeader.HEADER_FIELDS val : columnValues.keySet()) {
|
||||
switch (val) {
|
||||
case CHROM:
|
||||
this.setChomosome(columnValues.get(val));
|
||||
break;
|
||||
case POS:
|
||||
this.setPosition(Integer.valueOf(columnValues.get(val)));
|
||||
break;
|
||||
case ID:
|
||||
this.setID(columnValues.get(val));
|
||||
break;
|
||||
case REF:
|
||||
if (columnValues.get(val).length() != 1)
|
||||
throw new IllegalArgumentException("Reference base should be a single character");
|
||||
this.setReferenceBase(columnValues.get(val).charAt(0));
|
||||
break;
|
||||
case ALT:
|
||||
String values[] = columnValues.get(val).split(",");
|
||||
for (String alt : values)
|
||||
addAlternateBase(alt);
|
||||
break;
|
||||
case QUAL:
|
||||
this.setQual(Integer.valueOf(columnValues.get(val)));
|
||||
break;
|
||||
case FILTER:
|
||||
this.setFilterString(columnValues.get(val));
|
||||
break;
|
||||
case INFO:
|
||||
String vals[] = columnValues.get(val).split(";");
|
||||
for (String alt : vals) {
|
||||
String keyVal[] = alt.split("=");
|
||||
if (keyVal.length == 1 && keyVal[0].equals(".")) {
|
||||
this.addInfoField(keyVal[0], "");
|
||||
break;
|
||||
}
|
||||
if (keyVal.length != 2)
|
||||
throw new IllegalArgumentException("info field key-value pair did not parse into key->value pair: " + alt);
|
||||
this.addInfoField(keyVal[0], keyVal[1]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* do we have genotyping data
|
||||
*
|
||||
* @return true if we have genotyping data, false otherwise
|
||||
*/
|
||||
|
||||
public boolean hasGenotypeData() {
|
||||
if (mGenotypeFields==null) {
|
||||
if (mGenotypeFields.size() < 1) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the format string
|
||||
* @return the format sting, null if it doesn't exist
|
||||
*/
|
||||
public String getFormatString() {
|
||||
return mFormatString;
|
||||
}
|
||||
|
||||
/**
|
||||
* get a required field, given the field tag
|
||||
*
|
||||
* @param field the key for the field
|
||||
*
|
||||
* @return the field value
|
||||
*/
|
||||
public String getValue(VCFHeader.HEADER_FIELDS field) {
|
||||
return mValues.get(field);
|
||||
}
|
||||
|
||||
/** @return the string for the chromosome that this VCF record is associated with */
|
||||
public String getChromosome() {
|
||||
return mValues.get(VCFHeader.HEADER_FIELDS.CHROM);
|
||||
return this.mChrome;
|
||||
}
|
||||
|
||||
|
||||
/** @return this VCF records position on the specified chromosome */
|
||||
public long getPosition() {
|
||||
return Long.valueOf(this.mValues.get(VCFHeader.HEADER_FIELDS.POS));
|
||||
return this.mPosition;
|
||||
}
|
||||
|
||||
/** @return the ID value for this record */
|
||||
public String getID() {
|
||||
return mValues.get(VCFHeader.HEADER_FIELDS.ID);
|
||||
return this.mID;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -100,8 +177,7 @@ public class VCFRecord {
|
|||
* @return either A, T, C, G, or N
|
||||
*/
|
||||
public char getReferenceBase() {
|
||||
// TODO: this field isn't validated correctly
|
||||
return mValues.get(VCFHeader.HEADER_FIELDS.REF).charAt(0);
|
||||
return this.mReferenceBase;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -109,20 +185,17 @@ public class VCFRecord {
|
|||
*
|
||||
* @return an array of strings representing the alt alleles, or null if there are none
|
||||
*/
|
||||
public String[] getAlternateAlleles() {
|
||||
if (mValues.get(VCFHeader.HEADER_FIELDS.ALT).trim().equals(".")) {
|
||||
return null;
|
||||
}
|
||||
return mValues.get(VCFHeader.HEADER_FIELDS.ALT).split(",");
|
||||
public List<String> getAlternateAlleles() {
|
||||
return this.mAlts;
|
||||
}
|
||||
|
||||
public boolean hasAlternateAllele() {
|
||||
return getAlternateAlleles() != null;
|
||||
return getAlternateAlleles().size() > 0;
|
||||
}
|
||||
|
||||
/** @return the phred-scaled quality score */
|
||||
public int getQual() {
|
||||
return Integer.valueOf(mValues.get(VCFHeader.HEADER_FIELDS.QUAL));
|
||||
return this.mQual;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -131,10 +204,10 @@ public class VCFRecord {
|
|||
* @return an array of strings representing the filtering criteria, or null if none were applied
|
||||
*/
|
||||
public String[] getFilteringCodes() {
|
||||
if (mValues.get(VCFHeader.HEADER_FIELDS.FILTER).trim().equals("0")) {
|
||||
return null;
|
||||
}
|
||||
return mValues.get(VCFHeader.HEADER_FIELDS.ALT).split(";");
|
||||
//if (this.mFilterString.equals("0")) {
|
||||
// return null;
|
||||
//}
|
||||
return this.mFilterString.split(";");
|
||||
}
|
||||
|
||||
public boolean hasFilteringCodes() {
|
||||
|
|
@ -147,25 +220,18 @@ public class VCFRecord {
|
|||
* @return a map, of the info key-value pairs
|
||||
*/
|
||||
public Map<String, String> getInfoValues() {
|
||||
Map<String, String> ret = new HashMap<String, String>();
|
||||
String infoSplit[] = mValues.get(VCFHeader.HEADER_FIELDS.INFO).split(";");
|
||||
for (String s : infoSplit) {
|
||||
String keyValue[] = s.split("=");
|
||||
if (keyValue.length != 2)
|
||||
throw new RuntimeException("Key value pairs must have both a key and a value; pair: " + s);
|
||||
ret.put(keyValue[0], keyValue[1]);
|
||||
}
|
||||
return ret;
|
||||
return this.mInfoFields;
|
||||
}
|
||||
|
||||
/** @return the number of columnsof data we're storing */
|
||||
public int getColumnCount() {
|
||||
if (this.hasGenotypeData()) return mGenotypeFields.size() + mValues.size();
|
||||
return mValues.size();
|
||||
if (this.hasGenotypeData()) return mGenotypeFields.size() + VCFHeader.HEADER_FIELDS.values().length;
|
||||
return VCFHeader.HEADER_FIELDS.values().length;
|
||||
}
|
||||
|
||||
/**
|
||||
* return the mapping of the format tags to the specified sample's values
|
||||
*
|
||||
* @return a VCFGenotypeRecord
|
||||
*/
|
||||
public List<VCFGenotypeRecord> getVCFGenotypeRecords() {
|
||||
|
|
@ -174,9 +240,130 @@ public class VCFRecord {
|
|||
|
||||
/** @return a List of the sample names */
|
||||
public String[] getSampleNames() {
|
||||
String ret[] = new String[mHeader.getGenotypeSamples().size()];
|
||||
mHeader.getGenotypeSamples().toArray(ret);
|
||||
return ret;
|
||||
String names[] = new String[mGenotypeFields.size()];
|
||||
int index = 0;
|
||||
for (VCFGenotypeRecord rec : this.mGenotypeFields)
|
||||
names[index++] = rec.getSampleName();
|
||||
return names;
|
||||
}
|
||||
|
||||
public String getGenotypeFormatString() {
|
||||
return mGenotypeFormatString;
|
||||
}// the formatting string for our genotype records
|
||||
|
||||
public void setReferenceBase(char referenceBase) {
|
||||
// upppercase the character
|
||||
referenceBase = (char) ((referenceBase > 96) ? referenceBase - 32 : referenceBase);
|
||||
if (referenceBase != 'A' && referenceBase != 'C' && referenceBase != 'T' && referenceBase != 'G' && referenceBase != 'N')
|
||||
throw new IllegalArgumentException("Reference base must be one of A,C,G,T,N, we saw: " + referenceBase);
|
||||
this.mReferenceBase = referenceBase;
|
||||
}
|
||||
|
||||
public void setPosition(int mPosition) {
|
||||
if (mPosition < 0)
|
||||
throw new IllegalArgumentException("Position values must be greater than 0");
|
||||
this.mPosition = mPosition;
|
||||
}
|
||||
|
||||
public void setChomosome(String mChrome) {
|
||||
this.mChrome = mChrome;
|
||||
}
|
||||
|
||||
public void setID(String mID) {
|
||||
this.mID = mID;
|
||||
}
|
||||
|
||||
public void setQual(int mQual) {
|
||||
if (mQual < 0)
|
||||
throw new IllegalArgumentException("Qual values must be greater than 0");
|
||||
this.mQual = mQual;
|
||||
}
|
||||
|
||||
public void setFilterString(String mFilterString) {
|
||||
this.mFilterString = mFilterString;
|
||||
}
|
||||
|
||||
public void addGenotypeFields(VCFGenotypeRecord mGenotypeFields) {
|
||||
this.mGenotypeFields.add(mGenotypeFields);
|
||||
}
|
||||
|
||||
public void addAlternateBase(String base) {
|
||||
if (base.length() == 1) {
|
||||
char nuc = (char) ((base.charAt(0) > 96) ? base.charAt(0) - 32 : base.charAt(0));
|
||||
if (nuc != 'A' && nuc != 'C' && nuc != 'T' && nuc != 'G' && nuc != '.')
|
||||
throw new IllegalArgumentException("Alternate base must be either A,C,T,G,. or if an indel it must contain length information: " + base);
|
||||
} else {
|
||||
// we must be an indel, check that the first character is I or D
|
||||
char nuc = (char) ((base.charAt(0) > 96) ? base.charAt(0) - 32 : base.charAt(0));
|
||||
if (nuc != 'I' && nuc != 'D')
|
||||
throw new IllegalArgumentException("Alternate bases of length greater then one must be an indel: " + base);
|
||||
}
|
||||
this.mAlts.add(base);
|
||||
}
|
||||
|
||||
public void addInfoField(String key, String value) {
|
||||
this.mInfoFields.put(key, value);
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
|
||||
// else builder.append(FIELD_SEPERATOR + record.getValue(field));
|
||||
// CHROM \t POS \t ID \t REF \t ALT \t QUAL \t FILTER \t INFO
|
||||
builder.append(getChromosome() + FIELD_SEPERATOR);
|
||||
builder.append(getPosition() + FIELD_SEPERATOR);
|
||||
builder.append(getID() + FIELD_SEPERATOR);
|
||||
builder.append(getReferenceBase() + FIELD_SEPERATOR);
|
||||
String alts = "";
|
||||
for (String str : this.getAlternateAlleles()) alts += str + ",";
|
||||
builder.append(alts.substring(0, alts.length() - 1) + FIELD_SEPERATOR);
|
||||
builder.append(getQual() + FIELD_SEPERATOR);
|
||||
builder.append(Utils.join(";", getFilteringCodes()) + FIELD_SEPERATOR);
|
||||
String info = "";
|
||||
for (String str : this.getInfoValues().keySet()) {
|
||||
if (str.equals("."))
|
||||
info = ".";
|
||||
else
|
||||
info += str + "=" + getInfoValues().get(str) + ";";
|
||||
}
|
||||
|
||||
if (info.length() > 1) builder.append(info.substring(0, info.length() - 1));
|
||||
else builder.append(info);
|
||||
|
||||
if (this.hasGenotypeData()) {
|
||||
builder.append(FIELD_SEPERATOR + this.getGenotypeFormatString());
|
||||
for (VCFGenotypeRecord rec : this.getVCFGenotypeRecords()) {
|
||||
builder.append(FIELD_SEPERATOR);
|
||||
if (!rec.toGenotypeString(this.mAlts).equals(""))
|
||||
builder.append(rec.toGenotypeString(this.mAlts));
|
||||
for (String s : rec.getFields().keySet()) {
|
||||
if (rec.getFields().get(s).equals("")) continue;
|
||||
builder.append(":");
|
||||
builder.append(rec.getFields().get(s));
|
||||
}
|
||||
}
|
||||
}
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* compare two VCF records
|
||||
*
|
||||
* @param other the other VCF record
|
||||
*
|
||||
* @return true if they're equal
|
||||
*/
|
||||
public boolean equals(VCFRecord other) {
|
||||
if (!this.mAlts.equals(other.mAlts)) return false;
|
||||
if (this.mReferenceBase != other.mReferenceBase) return false;
|
||||
if (!this.mChrome.equals(other.mChrome)) return false;
|
||||
if (this.mPosition != other.mPosition) return false;
|
||||
if (!this.mID.equals(other.mID)) return false;
|
||||
if (this.mQual != other.mQual) return false;
|
||||
if (!this.mFilterString.equals(other.mFilterString)) return false;
|
||||
if (!this.mInfoFields.equals(other.mInfoFields)) return false;
|
||||
if (!this.mGenotypeFields.equals(other.mGenotypeFields)) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -61,33 +61,13 @@ public class VCFWriter {
|
|||
throw new RuntimeException("Record has " + record.getColumnCount() +
|
||||
" columns, when is should have " + mHeader.getColumnCount());
|
||||
}
|
||||
StringBuilder builder = new StringBuilder();
|
||||
String vcfString = record.toString();
|
||||
try {
|
||||
mWriter.write(vcfString + "\n");
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Unable to write the VCF object to a file");
|
||||
}
|
||||
|
||||
// first output the required fields in order
|
||||
boolean first = true;
|
||||
for (VCFHeader.HEADER_FIELDS field : mHeader.getHeaderFields()) {
|
||||
if (first) {
|
||||
first = false;
|
||||
builder.append(record.getValue(field));
|
||||
} else builder.append(FIELD_SEPERATOR + record.getValue(field));
|
||||
}
|
||||
if (record.hasGenotypeData()) {
|
||||
builder.append(FIELD_SEPERATOR + record.getFormatString());
|
||||
for (VCFGenotypeRecord rec : record.getVCFGenotypeRecords()) {
|
||||
builder.append(FIELD_SEPERATOR);
|
||||
boolean ft = true;
|
||||
for (String s : rec.getFields().keySet()) {
|
||||
if (!ft) builder.append(":");
|
||||
else ft = true;
|
||||
builder.append(rec.getFields().get(s));
|
||||
}
|
||||
}
|
||||
try {
|
||||
mWriter.write(builder.toString() + "\n");
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Unable to write the VCF object to a file");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** attempt to close the VCF file */
|
||||
|
|
|
|||
|
|
@ -0,0 +1,179 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
||||
import org.junit.Assert;
|
||||
import static org.junit.Assert.fail;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class RodVCFTest
|
||||
* <p/>
|
||||
* test out the rod VCF
|
||||
*/
|
||||
public class RodVCFTest extends BaseTest {
|
||||
|
||||
private static IndexedFastaSequenceFile seq;
|
||||
private static File vcfFile = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample.vcf");
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeTests() {
|
||||
try {
|
||||
seq = new IndexedFastaSequenceFile(new File("/broad/1KG/reference/human_b36_both.fasta"));
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("unable to load the sequence dictionary");
|
||||
}
|
||||
GenomeLocParser.setupRefContigOrdering(seq);
|
||||
|
||||
}
|
||||
|
||||
private RodVCF getVCFObject() {
|
||||
RodVCF vcf = new RodVCF("VCF");
|
||||
VCFHeader header = null;
|
||||
try {
|
||||
header = (VCFHeader) vcf.initialize(vcfFile);
|
||||
} catch (FileNotFoundException e) {
|
||||
fail("Unable to open VCF file");
|
||||
}
|
||||
header.checkVCFVersion();
|
||||
return vcf;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testInitialize() {
|
||||
getVCFObject();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testToIterator() {
|
||||
RodVCF vcf = getVCFObject();
|
||||
VCFReader reader = new VCFReader(vcfFile);
|
||||
Iterator<RodVCF> iter = vcf.createIterator("VCF", vcfFile);
|
||||
while (iter.hasNext()) {
|
||||
VCFRecord rec1 = reader.next();
|
||||
VCFRecord rec2 = iter.next().mCurrentRecord;
|
||||
if (!rec1.equals(rec2)) {
|
||||
fail("VCF record rec1 != rec2");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testToMAF() {
|
||||
RodVCF vcf = getVCFObject();
|
||||
Iterator<RodVCF> iter = vcf.createIterator("VCF", vcfFile);
|
||||
RodVCF rec = iter.next();
|
||||
Assert.assertEquals(rec.getNonRefAlleleFrequency(), 0.786, 0.00001);
|
||||
rec = iter.next();
|
||||
rec = iter.next();
|
||||
rec = iter.next();
|
||||
Assert.assertEquals(rec.getNonRefAlleleFrequency(), 0.0, 0.00001);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testToString() {
|
||||
// slightly altered line, due to map ordering
|
||||
String firstLine = "20\t14370\trs6054257\tG\tA\t29\t0\tDP=258;AF=0.786;NS=58\tGT:GQ:DP:HQ\t0|0:48:1:51,51\t1|0:48:8:51,51\t1/1:43:5\n";
|
||||
RodVCF vcf = getVCFObject();
|
||||
VCFReader reader = new VCFReader(vcfFile);
|
||||
Iterator<RodVCF> iter = vcf.createIterator("VCF", vcfFile);
|
||||
boolean first = true;
|
||||
while (iter.hasNext()) {
|
||||
VCFRecord rec1 = reader.next();
|
||||
VCFRecord rec2 = iter.next().mCurrentRecord;
|
||||
if (!rec1.toString().equals(rec2.toString())) {
|
||||
fail("VCF record rec1.toString() != rec2.toString()");
|
||||
}
|
||||
// verify the first line too
|
||||
if (first) {
|
||||
if (!firstLine.equals(rec1.toString() + "\n")) {
|
||||
fail("VCF record rec1.toString() != expected string :\n" + rec1.toString() + firstLine);
|
||||
}
|
||||
first = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testType() {
|
||||
RodVCF vcf = getVCFObject();
|
||||
Iterator<RodVCF> iter = vcf.createIterator("VCF", vcfFile);
|
||||
RodVCF rec = iter.next();
|
||||
Assert.assertTrue(rec.isSNP());
|
||||
rec = iter.next();
|
||||
rec = iter.next();
|
||||
rec = iter.next();
|
||||
rec = iter.next();
|
||||
Assert.assertTrue(rec.isIndel());
|
||||
Assert.assertTrue(rec.isInsertion());
|
||||
Assert.assertTrue(rec.isDeletion());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetGenotypes() {
|
||||
RodVCF vcf = getVCFObject();
|
||||
Iterator<RodVCF> iter = vcf.createIterator("VCF", vcfFile);
|
||||
RodVCF rec = iter.next();
|
||||
// we should get back a ref 'G' and an alt 'A'
|
||||
List<Genotype> list = rec.getGenotypes();
|
||||
List<String> knowngenotypes = new ArrayList<String>();
|
||||
knowngenotypes.add("GG");
|
||||
knowngenotypes.add("AG");
|
||||
knowngenotypes.add("AA");
|
||||
Assert.assertEquals(3, list.size());
|
||||
for (Genotype g: list) {
|
||||
Assert.assertTrue(knowngenotypes.contains(g.getBases()));
|
||||
}
|
||||
}
|
||||
@Test
|
||||
public void testGetGenotypesQual() {
|
||||
RodVCF vcf = getVCFObject();
|
||||
Iterator<RodVCF> iter = vcf.createIterator("VCF", vcfFile);
|
||||
RodVCF rec = iter.next();
|
||||
// we should get back a ref 'G' and an alt 'A'
|
||||
List<Genotype> list = rec.getGenotypes();
|
||||
Assert.assertEquals(4.8,list.get(0).getNegLog10PError(),0.0001);
|
||||
Assert.assertEquals(4.8,list.get(1).getNegLog10PError(),0.0001);
|
||||
Assert.assertEquals(4.3,list.get(2).getNegLog10PError(),0.0001);
|
||||
}
|
||||
@Test
|
||||
public void testHasGenotypes() {
|
||||
RodVCF vcf = getVCFObject();
|
||||
Iterator<RodVCF> iter = vcf.createIterator("VCF", vcfFile);
|
||||
RodVCF rec = iter.next();
|
||||
Assert.assertTrue(rec.hasGenotype(DiploidGenotype.valueOf("GG")));
|
||||
Assert.assertTrue(rec.hasGenotype(DiploidGenotype.valueOf("AG")));
|
||||
Assert.assertTrue(rec.hasGenotype(DiploidGenotype.valueOf("AA")));
|
||||
Assert.assertTrue(!rec.hasGenotype(DiploidGenotype.valueOf("TT")));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetLocation() {
|
||||
RodVCF vcf = getVCFObject();
|
||||
Iterator<RodVCF> iter = vcf.createIterator("VCF", vcfFile);
|
||||
RodVCF rec = iter.next();
|
||||
GenomeLoc loc = rec.getLocation();
|
||||
Assert.assertEquals(14370,loc.getStart());
|
||||
Assert.assertEquals(14370,loc.getStop());
|
||||
Assert.assertTrue(loc.getContig().equals("20"));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -8,64 +8,63 @@ import java.util.Arrays;
|
|||
public class VariantFiltrationIntegrationTest extends WalkerTest {
|
||||
@Test
|
||||
public void testIntervals() {
|
||||
|
||||
String[] md5DoC = {"5f176d27eba04c8e08b54b60fd96758e", "21c8e1f9dc65fdfb39347547f9b04011"};
|
||||
String[] md5DoC = {"eada262e03738876a2b5b94d56f0951e", "21c8e1f9dc65fdfb39347547f9b04011"};
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(
|
||||
"-T VariantFiltration -X DepthOfCoverage:max=70 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
||||
2,
|
||||
Arrays.asList(md5DoC));
|
||||
executeTest("testDoCFilter", spec1);
|
||||
|
||||
String[] md5AlleleBalance = {"dfa47f9465214445b737ec49025d6b5f", "a13e4ce6260bf9f33ca99dc808b8e6ad"};
|
||||
String[] md5AlleleBalance = {"0ade6c97a46245a65558d115ef1e3956", "a13e4ce6260bf9f33ca99dc808b8e6ad"};
|
||||
WalkerTestSpec spec2 = new WalkerTestSpec(
|
||||
"-T VariantFiltration -X AlleleBalance:low=0.25,high=0.75 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
||||
2,
|
||||
Arrays.asList(md5AlleleBalance));
|
||||
executeTest("testAlleleBalanceFilter", spec2);
|
||||
|
||||
String[] md5Strand = {"2d32ddd6daccd08f3df148f4d373fa5e", "0f7db0aad764268ee8fa3b857df8d87d"};
|
||||
String[] md5Strand = {"1a9568e3f7e88483e7fbc5f981a7973c", "0f7db0aad764268ee8fa3b857df8d87d"};
|
||||
WalkerTestSpec spec3 = new WalkerTestSpec(
|
||||
"-T VariantFiltration -X FisherStrand:pvalue=0.0001 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
||||
2,
|
||||
Arrays.asList(md5Strand));
|
||||
executeTest("testStrandFilter", spec3);
|
||||
|
||||
String[] md5Lod = {"157e7beab140f4252ed41ebfc9a508f2", "7e0c4f2b0fda85fd2891eee76c396a55"};
|
||||
String[] md5Lod = {"64bc742d0f44a7279ac6c4ad5c28da28", "7e0c4f2b0fda85fd2891eee76c396a55"};
|
||||
WalkerTestSpec spec4 = new WalkerTestSpec(
|
||||
"-T VariantFiltration -X LodThreshold:lod=10 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
||||
2,
|
||||
Arrays.asList(md5Lod));
|
||||
executeTest("testLodFilter", spec4);
|
||||
|
||||
String[] md5MQ0 = {"15738f3717dfa3192566a7ddf301fe11", "3203de335621851bccf596242b079e23"};
|
||||
String[] md5MQ0 = {"1201176efca694e2566a9de1508517b8", "3203de335621851bccf596242b079e23"};
|
||||
WalkerTestSpec spec5 = new WalkerTestSpec(
|
||||
"-T VariantFiltration -X MappingQualityZero:max=70 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
||||
2,
|
||||
Arrays.asList(md5MQ0));
|
||||
executeTest("testMappingQuality0Filter", spec5);
|
||||
|
||||
String[] md5MQ = {"e7fb1f166d2359f653f98f8215215aca", "ecc777feedea61f7b570d114c2ab89b1"};
|
||||
String[] md5MQ = {"ed922e58c856e1bc3f125b635dea48fc", "ecc777feedea61f7b570d114c2ab89b1"};
|
||||
WalkerTestSpec spec6 = new WalkerTestSpec(
|
||||
"-T VariantFiltration -X MappingQuality:min=20 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
||||
2,
|
||||
Arrays.asList(md5MQ));
|
||||
executeTest("testRMSMappingQualityFilter", spec6);
|
||||
|
||||
String[] md5OnOff = {"fe4acc1505f19480f275d7a96b0e3ddb", "67f2e1bc025833b0fa31f47195198997"};
|
||||
String[] md5OnOff = {"8a00c5ad80c43e468186ce0147553048", "67f2e1bc025833b0fa31f47195198997"};
|
||||
WalkerTestSpec spec7 = new WalkerTestSpec(
|
||||
"-T VariantFiltration -X OnOffGenotypeRatio:threshold=0.9 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
||||
2,
|
||||
Arrays.asList(md5OnOff));
|
||||
executeTest("testOnOffGenotypeFilter", spec7);
|
||||
|
||||
String[] md5Clusters = {"f5c7c5da0198c4aaafdfcfb3c356eedc", "8fa6b6ffc93ee7fb8d6b52a7fb7815ef"};
|
||||
String[] md5Clusters = {"ee9d8039490354cc7f2c574b5d10c7d8", "8fa6b6ffc93ee7fb8d6b52a7fb7815ef"};
|
||||
WalkerTestSpec spec8 = new WalkerTestSpec(
|
||||
"-T VariantFiltration -X ClusteredSnps:window=10,snps=3 -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
||||
2,
|
||||
Arrays.asList(md5Clusters));
|
||||
executeTest("testClusteredSnpsFilter", spec8);
|
||||
|
||||
String[] md5Indels = {"30bf4c764f6dfd006d919ecaceee0166", "8e0e915a1cb63d7049e0671ed00101fe"};
|
||||
String[] md5Indels = {"90eb46f8d2bf4c0a72716e35c914839d", "8e0e915a1cb63d7049e0671ed00101fe"};
|
||||
WalkerTestSpec spec9 = new WalkerTestSpec(
|
||||
"-T VariantFiltration -X IndelArtifact -B indels,PointIndel,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.indels -B cleaned,CleanedOutSNP,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.realigner_badsnps -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-11,000,000 -B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.chr1_10mb_11mb.slx.geli.calls -D /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -vcf %s -included %s -sample NA12878",
|
||||
2,
|
||||
|
|
|
|||
|
|
@ -41,11 +41,10 @@ public class VCFReaderTest extends BaseTest {
|
|||
char referenceBase = 'N';
|
||||
VCFGenotypeRecord rec = VCFReader.getVCFGenotype("test",formatString,genotypeString,altAlleles,referenceBase);
|
||||
Assert.assertEquals(VCFGenotypeRecord.PHASE.PHASED,rec.getPhaseType());
|
||||
Assert.assertEquals(referenceBase,rec.getReference());
|
||||
Assert.assertEquals("N",rec.getAllele().get(0));
|
||||
Assert.assertEquals("A",rec.getAllele().get(1));
|
||||
Assert.assertEquals("N",rec.getAlleles().get(0));
|
||||
Assert.assertEquals("A",rec.getAlleles().get(1));
|
||||
Map<String,String> values = rec.getFields();
|
||||
Assert.assertEquals(4,values.size());
|
||||
Assert.assertEquals(3,values.size());
|
||||
Assert.assertTrue(values.get("B").equals("2"));
|
||||
Assert.assertTrue(values.get("C").equals("3"));
|
||||
Assert.assertTrue(values.get("D").equals("4"));
|
||||
|
|
@ -63,11 +62,10 @@ public class VCFReaderTest extends BaseTest {
|
|||
char referenceBase = 'N';
|
||||
VCFGenotypeRecord rec = VCFReader.getVCFGenotype("test",formatString,genotypeString,altAlleles,referenceBase);
|
||||
Assert.assertEquals(VCFGenotypeRecord.PHASE.PHASED,rec.getPhaseType());
|
||||
Assert.assertEquals(referenceBase,rec.getReference());
|
||||
Assert.assertEquals("N",rec.getAllele().get(0));
|
||||
Assert.assertEquals("A",rec.getAllele().get(1));
|
||||
Assert.assertEquals("N",rec.getAlleles().get(0));
|
||||
Assert.assertEquals("A",rec.getAlleles().get(1));
|
||||
Map<String,String> values = rec.getFields();
|
||||
Assert.assertEquals(4,values.size());
|
||||
Assert.assertEquals(3,values.size());
|
||||
Assert.assertTrue(values.get("B").equals(""));
|
||||
Assert.assertTrue(values.get("C").equals(""));
|
||||
Assert.assertTrue(values.get("D").equals("4"));
|
||||
|
|
@ -84,11 +82,10 @@ public class VCFReaderTest extends BaseTest {
|
|||
char referenceBase = 'N';
|
||||
VCFGenotypeRecord rec = VCFReader.getVCFGenotype("test",formatString,genotypeString,altAlleles,referenceBase);
|
||||
Assert.assertEquals(VCFGenotypeRecord.PHASE.PHASED,rec.getPhaseType());
|
||||
Assert.assertEquals(referenceBase,rec.getReference());
|
||||
Assert.assertEquals("N",rec.getAllele().get(0));
|
||||
Assert.assertEquals("A",rec.getAllele().get(1));
|
||||
Assert.assertEquals("N",rec.getAlleles().get(0));
|
||||
Assert.assertEquals("A",rec.getAlleles().get(1));
|
||||
Map<String,String> values = rec.getFields();
|
||||
Assert.assertEquals(4,values.size());
|
||||
Assert.assertEquals(3,values.size());
|
||||
Assert.assertTrue(values.get("B").equals(""));
|
||||
Assert.assertTrue(values.get("C").equals(""));
|
||||
Assert.assertTrue(values.get("D").equals(""));
|
||||
|
|
|
|||
|
|
@ -0,0 +1,46 @@
|
|||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @author aaron
|
||||
*
|
||||
* Class VCFRecordTest
|
||||
*
|
||||
* test the basic functionality of the vcf record
|
||||
*/
|
||||
public class VCFRecordTest extends BaseTest {
|
||||
|
||||
private VCFRecord makeFakeVCFRecord() {
|
||||
List<String> altBases = new ArrayList<String>();
|
||||
altBases.add("C");
|
||||
altBases.add("D1");
|
||||
Map<String,String> infoFields = new HashMap<String,String>();
|
||||
infoFields.put("DP","50");
|
||||
List<VCFGenotypeRecord> genotypeObjects = new ArrayList<VCFGenotypeRecord>();
|
||||
Map<String, String> keyValues = new HashMap<String,String>();
|
||||
keyValues.put("AA","2");
|
||||
List<String> Alleles = new ArrayList<String>();
|
||||
Alleles.add("A");
|
||||
genotypeObjects.add(new VCFGenotypeRecord("SampleName", Alleles, VCFGenotypeRecord.PHASE.PHASED, keyValues));
|
||||
return new VCFRecord('A',"chr1",1,"RANDOM",altBases,0,".",infoFields, "GT:AA",genotypeObjects);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testGetGenotypes() {
|
||||
VCFRecord rec = makeFakeVCFRecord();
|
||||
List<VCFGenotypeRecord> genotypeObjects = rec.getVCFGenotypeRecords();
|
||||
Assert.assertEquals(1,genotypeObjects.size());
|
||||
Assert.assertTrue(genotypeObjects.get(0).getSampleName().equals("SampleName"));
|
||||
}
|
||||
}
|
||||
|
|
@ -60,18 +60,24 @@ public class VCFWriterTest extends BaseTest {
|
|||
* @return a VCFRecord
|
||||
*/
|
||||
private VCFRecord createVCFRecord(VCFHeader header) {
|
||||
Map<VCFHeader.HEADER_FIELDS,String> map = new HashMap<VCFHeader.HEADER_FIELDS,String>();
|
||||
for (VCFHeader.HEADER_FIELDS field : VCFHeader.HEADER_FIELDS.values())
|
||||
map.put(field,String.valueOf(1));
|
||||
List<String> altBases = new ArrayList<String>();
|
||||
altBases.add("C");
|
||||
altBases.add("D1");
|
||||
Map<String,String> infoFields = new HashMap<String,String>();
|
||||
infoFields.put("DP","50");
|
||||
|
||||
List<VCFGenotypeRecord> gt = new ArrayList<VCFGenotypeRecord>();
|
||||
for (String name : header.getGenotypeSamples()) {
|
||||
Map<String,String> str = new HashMap<String,String>();
|
||||
str.put("key","0|0");
|
||||
List<String> alleles = new ArrayList<String>();
|
||||
alleles.add("AAA");
|
||||
gt.add(new VCFGenotypeRecord(name,str,alleles, VCFGenotypeRecord.PHASE.PHASED,'A'));
|
||||
str.put("bb","0");
|
||||
|
||||
List<String> myAlleles = new ArrayList<String>();
|
||||
myAlleles.add("C");
|
||||
myAlleles.add("D1");
|
||||
gt.add(new VCFGenotypeRecord(name, myAlleles, VCFGenotypeRecord.PHASE.PHASED, str));
|
||||
}
|
||||
return new VCFRecord(header,map,"GT",gt);
|
||||
return new VCFRecord('A',"chr1",1,"RANDOM",altBases,0,".",infoFields, "GT:AA",gt);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue