2009-09-19 04:19:34 +08:00
|
|
|
package org.broadinstitute.sting.gatk.refdata;
|
|
|
|
|
|
|
|
|
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
|
|
|
import org.broadinstitute.sting.utils.StingException;
|
2009-10-16 12:11:34 +08:00
|
|
|
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
2009-09-19 04:19:34 +08:00
|
|
|
import org.broadinstitute.sting.utils.genotype.Genotype;
|
2009-10-16 12:11:34 +08:00
|
|
|
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
|
2009-11-01 13:35:47 +08:00
|
|
|
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
2009-09-19 04:19:34 +08:00
|
|
|
|
|
|
|
|
import java.io.File;
|
|
|
|
|
import java.io.FileNotFoundException;
|
|
|
|
|
import java.io.IOException;
|
|
|
|
|
import java.util.Iterator;
|
|
|
|
|
import java.util.List;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @author aaron
|
|
|
|
|
* <p/>
|
|
|
|
|
* Class RodVCF
|
|
|
|
|
* <p/>
|
|
|
|
|
* An implementation of the ROD for VCF.
|
|
|
|
|
*/
|
2009-09-24 02:24:05 +08:00
|
|
|
public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod, VariantBackedByGenotype, Iterator<RodVCF> {
|
2009-09-19 04:19:34 +08:00
|
|
|
// our VCF related information
|
|
|
|
|
private VCFReader mReader;
|
|
|
|
|
public VCFRecord mCurrentRecord;
|
|
|
|
|
|
|
|
|
|
public RodVCF(String name) {
|
|
|
|
|
super(name);
|
|
|
|
|
}
|
|
|
|
|
|
2009-09-30 05:28:21 +08:00
|
|
|
private RodVCF(String name, VCFRecord currentRecord, VCFReader reader) {
|
|
|
|
|
super(name);
|
|
|
|
|
mCurrentRecord = currentRecord;
|
|
|
|
|
mReader = reader;
|
|
|
|
|
}
|
|
|
|
|
|
2009-09-19 04:19:34 +08:00
|
|
|
public void assertNotNull() {
|
2009-12-06 14:48:03 +08:00
|
|
|
if ( mCurrentRecord == null ) {
|
2009-09-19 04:19:34 +08:00
|
|
|
throw new UnsupportedOperationException("The current Record is null");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2009-12-06 14:48:03 +08:00
|
|
|
public void assertBiAllelic() {
|
|
|
|
|
if ( !isBiallelic() )
|
|
|
|
|
throw new StingException("This VCF rod is not bi-allelic.");
|
|
|
|
|
}
|
|
|
|
|
|
2009-09-19 04:19:34 +08:00
|
|
|
@Override
|
|
|
|
|
public boolean parseLine(Object header, String[] parts) throws IOException {
|
2009-12-06 14:48:03 +08:00
|
|
|
throw new UnsupportedOperationException("RodVCF does not support the parseLine method");
|
2009-09-19 04:19:34 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public Object initialize(final File source) throws FileNotFoundException {
|
2009-12-06 14:48:03 +08:00
|
|
|
if ( mReader == null )
|
|
|
|
|
mReader = new VCFReader(source);
|
2009-09-19 04:19:34 +08:00
|
|
|
return mReader.getHeader();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
|
public String toString() {
|
2009-12-06 14:48:03 +08:00
|
|
|
return (mCurrentRecord != null ? mCurrentRecord.toStringEncoding(mReader.getHeader()) : "");
|
2009-09-19 04:19:34 +08:00
|
|
|
}
|
|
|
|
|
|
2009-09-30 05:28:21 +08:00
|
|
|
public static RodVCF createIterator(String name, File file) {
|
2009-09-19 04:19:34 +08:00
|
|
|
RodVCF vcf = new RodVCF(name);
|
|
|
|
|
try {
|
|
|
|
|
vcf.initialize(file);
|
|
|
|
|
} catch (FileNotFoundException e) {
|
|
|
|
|
throw new StingException("Unable to find file " + file);
|
|
|
|
|
}
|
|
|
|
|
return vcf;
|
|
|
|
|
}
|
|
|
|
|
|
2009-11-09 12:20:35 +08:00
|
|
|
public boolean hasNonRefAlleleFrequency() {
|
2009-12-06 14:48:03 +08:00
|
|
|
assertNotNull();
|
|
|
|
|
return mCurrentRecord.getNonRefAlleleFrequency() > 0.0;
|
2009-11-09 12:20:35 +08:00
|
|
|
}
|
|
|
|
|
|
2009-09-19 04:19:34 +08:00
|
|
|
/**
|
|
|
|
|
* get the frequency of this variant
|
|
|
|
|
*
|
|
|
|
|
* @return VariantFrequency with the stored frequency
|
|
|
|
|
*/
|
|
|
|
|
public double getNonRefAlleleFrequency() {
|
|
|
|
|
assertNotNull();
|
2009-12-06 14:48:03 +08:00
|
|
|
return mCurrentRecord.getNonRefAlleleFrequency();
|
2009-09-19 04:19:34 +08:00
|
|
|
}
|
|
|
|
|
|
2009-11-09 12:20:35 +08:00
|
|
|
public boolean hasStrandBias() {
|
2009-12-06 14:48:03 +08:00
|
|
|
assertNotNull();
|
2009-11-09 12:20:35 +08:00
|
|
|
return this.mCurrentRecord.getInfoValues().containsKey("SB");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* get the strand bias of this variant
|
|
|
|
|
*
|
|
|
|
|
* @return StrandBias with the stored slod
|
|
|
|
|
*/
|
|
|
|
|
public double getStrandBias() {
|
2009-12-06 14:48:03 +08:00
|
|
|
return hasStrandBias() ? Double.valueOf(this.mCurrentRecord.getInfoValues().get("SB")) : 0.0;
|
2009-11-09 12:20:35 +08:00
|
|
|
}
|
|
|
|
|
|
2009-09-19 04:19:34 +08:00
|
|
|
/** @return the VARIANT_TYPE of the current variant */
|
|
|
|
|
public VARIANT_TYPE getType() {
|
2009-12-06 14:48:03 +08:00
|
|
|
assertNotNull();
|
|
|
|
|
return mCurrentRecord.getType();
|
2009-09-19 04:19:34 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* are we a SNP? If not we're a Indel/deletion
|
|
|
|
|
*
|
|
|
|
|
* @return true if we're a SNP
|
|
|
|
|
*/
|
|
|
|
|
public boolean isSNP() {
|
2009-12-06 14:48:03 +08:00
|
|
|
assertNotNull();
|
2009-10-20 08:55:24 +08:00
|
|
|
assertBiAllelic();
|
2009-12-06 14:48:03 +08:00
|
|
|
return mCurrentRecord.isSNP();
|
2009-09-19 04:19:34 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* are we an insertion?
|
|
|
|
|
*
|
|
|
|
|
* @return true if we are, false otherwise
|
|
|
|
|
*/
|
|
|
|
|
public boolean isInsertion() {
|
2009-12-06 14:48:03 +08:00
|
|
|
assertNotNull();
|
2009-10-20 08:55:24 +08:00
|
|
|
assertBiAllelic();
|
2009-12-06 14:48:03 +08:00
|
|
|
return mCurrentRecord.isInsertion();
|
2009-09-19 04:19:34 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* are we an insertion?
|
|
|
|
|
*
|
|
|
|
|
* @return true if we are, false otherwise
|
|
|
|
|
*/
|
|
|
|
|
public boolean isDeletion() {
|
2009-12-06 14:48:03 +08:00
|
|
|
assertNotNull();
|
2009-10-20 08:55:24 +08:00
|
|
|
assertBiAllelic();
|
2009-12-06 14:48:03 +08:00
|
|
|
return mCurrentRecord.isDeletion();
|
2009-09-19 04:19:34 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
|
public GenomeLoc getLocation() {
|
2009-12-06 14:48:03 +08:00
|
|
|
assertNotNull();
|
|
|
|
|
return mCurrentRecord.getLocation();
|
2009-09-19 04:19:34 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* get the reference base(s) at this position
|
|
|
|
|
*
|
|
|
|
|
* @return the reference base or bases, as a string
|
|
|
|
|
*/
|
|
|
|
|
public String getReference() {
|
2009-12-06 14:48:03 +08:00
|
|
|
assertNotNull();
|
|
|
|
|
return mCurrentRecord.getReference();
|
2009-09-19 04:19:34 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** are we bi-allelic? */
|
|
|
|
|
public boolean isBiallelic() {
|
2009-12-06 14:48:03 +08:00
|
|
|
assertNotNull();
|
|
|
|
|
return mCurrentRecord.isBiallelic();
|
2009-09-19 04:19:34 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* get the -1 * (log 10 of the error value)
|
|
|
|
|
*
|
|
|
|
|
* @return the log based error estimate
|
|
|
|
|
*/
|
|
|
|
|
public double getNegLog10PError() {
|
2009-12-06 14:48:03 +08:00
|
|
|
assertNotNull();
|
|
|
|
|
return mCurrentRecord.getNegLog10PError();
|
2009-09-19 04:19:34 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2009-10-23 14:31:15 +08:00
|
|
|
* gets the alternate alleles. This method should return all the alleles present at the location,
|
|
|
|
|
* NOT including the reference base. This is returned as a string list with no guarantee ordering
|
|
|
|
|
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
|
|
|
|
|
* frequency).
|
2009-09-19 04:19:34 +08:00
|
|
|
*
|
2009-10-23 14:31:15 +08:00
|
|
|
* @return an alternate allele list
|
2009-09-19 04:19:34 +08:00
|
|
|
*/
|
2009-10-23 14:31:15 +08:00
|
|
|
public List<String> getAlternateAlleleList() {
|
2009-12-06 14:48:03 +08:00
|
|
|
assertNotNull();
|
|
|
|
|
return mCurrentRecord.getAlternateAlleleList();
|
2009-09-19 04:19:34 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2009-10-23 14:31:15 +08:00
|
|
|
* gets the alleles. This method should return all the alleles present at the location,
|
|
|
|
|
* including the reference base. The first allele should always be the reference allele, followed
|
|
|
|
|
* by an unordered list of alternate alleles.
|
2009-09-19 04:19:34 +08:00
|
|
|
*
|
2009-10-23 14:31:15 +08:00
|
|
|
* @return an alternate allele list
|
2009-09-19 04:19:34 +08:00
|
|
|
*/
|
2009-10-23 14:31:15 +08:00
|
|
|
public List<String> getAlleleList() {
|
2009-12-06 14:48:03 +08:00
|
|
|
assertNotNull();
|
|
|
|
|
return mCurrentRecord.getAlleleList();
|
2009-09-19 04:19:34 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
2009-10-23 14:31:15 +08:00
|
|
|
* are we truely a variant, given a reference
|
2009-09-19 04:19:34 +08:00
|
|
|
*
|
2009-10-23 14:31:15 +08:00
|
|
|
* @return false if we're a variant(indel, delete, SNP, etc), true if we're not
|
2009-09-19 04:19:34 +08:00
|
|
|
*/
|
2009-10-23 14:31:15 +08:00
|
|
|
public boolean isReference() {
|
2009-12-06 14:48:03 +08:00
|
|
|
assertNotNull();
|
|
|
|
|
return mCurrentRecord.isReference();
|
2009-09-19 04:19:34 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* are we an insertion or a deletion? yes, then return true. No? Well, false then.
|
|
|
|
|
*
|
|
|
|
|
* @return true if we're an insertion or deletion
|
|
|
|
|
*/
|
|
|
|
|
public boolean isIndel() {
|
2009-12-06 14:48:03 +08:00
|
|
|
assertNotNull();
|
|
|
|
|
return mCurrentRecord.isIndel();
|
2009-09-19 04:19:34 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* gets the alternate base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
|
|
|
|
|
* of
|
|
|
|
|
*
|
|
|
|
|
* @return a char, representing the alternate base
|
|
|
|
|
*/
|
|
|
|
|
public char getAlternativeBaseForSNP() {
|
2009-12-06 14:48:03 +08:00
|
|
|
assertNotNull();
|
|
|
|
|
return mCurrentRecord.getAlternativeBaseForSNP();
|
2009-09-19 04:19:34 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
|
|
|
|
|
*
|
|
|
|
|
* @return a char, representing the alternate base
|
|
|
|
|
*/
|
|
|
|
|
public char getReferenceForSNP() {
|
2009-12-06 14:48:03 +08:00
|
|
|
assertNotNull();
|
|
|
|
|
return mCurrentRecord.getReferenceForSNP();
|
2009-09-19 04:19:34 +08:00
|
|
|
}
|
|
|
|
|
|
2009-09-19 06:25:16 +08:00
|
|
|
/**
|
|
|
|
|
* get the genotype
|
|
|
|
|
*
|
|
|
|
|
* @return a map in lexigraphical order of the genotypes
|
|
|
|
|
*/
|
2009-09-19 06:38:51 +08:00
|
|
|
public Genotype getCalledGenotype() {
|
2009-12-06 14:48:03 +08:00
|
|
|
assertNotNull();
|
|
|
|
|
return mCurrentRecord.getCalledGenotype();
|
2009-09-19 06:25:16 +08:00
|
|
|
}
|
|
|
|
|
|
2009-09-19 04:19:34 +08:00
|
|
|
/**
|
|
|
|
|
* get the genotypes
|
|
|
|
|
*
|
|
|
|
|
* @return a list of the genotypes
|
|
|
|
|
*/
|
|
|
|
|
public List<Genotype> getGenotypes() {
|
2009-12-06 14:48:03 +08:00
|
|
|
assertNotNull();
|
|
|
|
|
return mCurrentRecord.getGenotypes();
|
2009-11-15 17:28:25 +08:00
|
|
|
}
|
|
|
|
|
|
2009-12-06 19:43:40 +08:00
|
|
|
/**
|
|
|
|
|
* get the genotypes
|
|
|
|
|
*
|
|
|
|
|
* @return a list of the genotypes
|
|
|
|
|
*/
|
|
|
|
|
public List<VCFGenotypeRecord> getVCFGenotypeRecords() {
|
|
|
|
|
assertNotNull();
|
|
|
|
|
return mCurrentRecord.getVCFGenotypeRecords();
|
|
|
|
|
}
|
|
|
|
|
|
2009-09-19 04:19:34 +08:00
|
|
|
/**
|
|
|
|
|
* do we have the specified genotype? not all backedByGenotypes
|
|
|
|
|
* have all the genotype data.
|
|
|
|
|
*
|
|
|
|
|
* @param x the genotype
|
|
|
|
|
*
|
|
|
|
|
* @return true if available, false otherwise
|
|
|
|
|
*/
|
|
|
|
|
public boolean hasGenotype(DiploidGenotype x) {
|
2009-12-06 14:48:03 +08:00
|
|
|
assertNotNull();
|
|
|
|
|
return mCurrentRecord.hasGenotype(x);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public VCFHeader getHeader() {
|
|
|
|
|
return mReader.getHeader();
|
2009-09-19 04:19:34 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public boolean hasNext() {
|
2009-12-06 14:48:03 +08:00
|
|
|
return mReader.hasNext();
|
2009-09-19 04:19:34 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public RodVCF next() {
|
|
|
|
|
mCurrentRecord = mReader.next();
|
2009-12-06 14:48:03 +08:00
|
|
|
return new RodVCF(name, mCurrentRecord, mReader);
|
2009-09-19 04:19:34 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public void remove() {
|
2009-12-06 14:48:03 +08:00
|
|
|
throw new UnsupportedOperationException("The remove operation is not supported for a VCF rod");
|
2009-09-19 04:19:34 +08:00
|
|
|
}
|
|
|
|
|
}
|