package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.*;
/**
* @author aaron
*
* Class RodVCF
*
* An implementation of the ROD for VCF.
*/
public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod, VariantBackedByGenotype, Iterator {
public VCFReader getReader() {
return mReader;
}
// our VCF related information
private VCFReader mReader;
public VCFRecord getRecord() {
return mCurrentRecord;
}
public VCFRecord mCurrentRecord;
public RodVCF(String name) {
super(name);
}
public RodVCF(String name, VCFRecord currentRecord, VCFReader reader) {
super(name);
mCurrentRecord = currentRecord;
mReader = reader;
}
public void assertNotNull() {
if ( mCurrentRecord == null ) {
throw new UnsupportedOperationException("The current Record is null");
}
}
@Override
public boolean parseLine(Object header, String[] parts) throws IOException {
throw new UnsupportedOperationException("RodVCF does not support the parseLine method");
}
public Object initialize(final File source) throws FileNotFoundException {
if ( mReader == null )
mReader = new VCFReader(source);
return mReader.getHeader();
}
@Override
public String toString() {
return (mCurrentRecord != null ? mCurrentRecord.toStringEncoding(mReader.getHeader()) : "");
}
public static RodVCF createIterator(String name, File file) {
RodVCF vcf = new RodVCF(name);
try {
vcf.initialize(file);
} catch (FileNotFoundException e) {
throw new StingException("Unable to find file " + file);
}
return vcf;
}
public boolean hasNonRefAlleleFrequency() {
assertNotNull();
return mCurrentRecord.getNonRefAlleleFrequency() > 0.0;
}
/**
* get the frequency of this variant
*
* @return VariantFrequency with the stored frequency
*/
public double getNonRefAlleleFrequency() {
assertNotNull();
return mCurrentRecord.getNonRefAlleleFrequency();
}
public boolean hasStrandBias() {
assertNotNull();
return this.mCurrentRecord.getInfoValues().containsKey(VCFRecord.STRAND_BIAS_KEY);
}
/**
* get the strand bias of this variant
*
* @return StrandBias with the stored slod
*/
public double getStrandBias() {
return hasStrandBias() ? Double.valueOf(this.mCurrentRecord.getInfoValues().get(VCFRecord.STRAND_BIAS_KEY)) : 0.0;
}
/** @return the VARIANT_TYPE of the current variant */
public VARIANT_TYPE getType() {
assertNotNull();
return mCurrentRecord.getType();
}
public String getID() {
assertNotNull();
return mCurrentRecord.getID();
}
/**
* are we a SNP? If not we're a Indel/deletion
*
* @return true if we're a SNP
*/
public boolean isSNP() {
assertNotNull();
return mCurrentRecord.isSNP();
}
/**
* are we an insertion?
*
* @return true if we are, false otherwise
*/
public boolean isInsertion() {
assertNotNull();
return mCurrentRecord.isInsertion();
}
/**
* are we an insertion?
*
* @return true if we are, false otherwise
*/
public boolean isDeletion() {
assertNotNull();
return mCurrentRecord.isDeletion();
}
@Override
public GenomeLoc getLocation() {
assertNotNull();
return mCurrentRecord.getLocation();
}
/**
* get the reference base(s) at this position
*
* @return the reference base or bases, as a string
*/
public String getReference() {
assertNotNull();
return mCurrentRecord.getReference();
}
/** are we bi-allelic? */
public boolean isBiallelic() {
assertNotNull();
return mCurrentRecord.isBiallelic();
}
/**
* get the -1 * (log 10 of the error value)
*
* @return the log based error estimate
*/
public double getNegLog10PError() {
assertNotNull();
return mCurrentRecord.getNegLog10PError();
}
public double getQual() {
assertNotNull();
return mCurrentRecord.getQual();
}
public boolean hasAlternateAllele() {
assertNotNull();
return mCurrentRecord.hasAlternateAllele();
}
/**
* gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference base. This is returned as a string list with no guarantee ordering
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
* frequency).
*
* @return an alternate allele list
*/
public List getAlternateAlleleList() {
assertNotNull();
return mCurrentRecord.getAlternateAlleleList();
}
/**
* gets the alleles. This method should return all the alleles present at the location,
* including the reference base. The first allele should always be the reference allele, followed
* by an unordered list of alternate alleles.
*
* @return an alternate allele list
*/
public List getAlleleList() {
assertNotNull();
return mCurrentRecord.getAlleleList();
}
/**
* are we truely a variant, given a reference
*
* @return false if we're a variant(indel, delete, SNP, etc), true if we're not
*/
public boolean isReference() {
assertNotNull();
return mCurrentRecord.isReference();
}
/**
* are we an insertion or a deletion? yes, then return true. No? Well, false then.
*
* @return true if we're an insertion or deletion
*/
public boolean isIndel() {
assertNotNull();
return mCurrentRecord.isIndel();
}
/**
* gets the alternate base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
* of
*
* @return a char, representing the alternate base
*/
public char getAlternativeBaseForSNP() {
assertNotNull();
return mCurrentRecord.getAlternativeBaseForSNP();
}
/**
* gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
*
* @return a char, representing the alternate base
*/
public char getReferenceForSNP() {
assertNotNull();
return mCurrentRecord.getReferenceForSNP();
}
public boolean hasGenotypeData() {
assertNotNull();
return mCurrentRecord.hasGenotypeData();
}
/**
* get the genotype
*
* // todo -- WTF is this? This is a deeply unsafe call
*
* @return a map in lexigraphical order of the genotypes
*/
public Genotype getCalledGenotype() {
assertNotNull();
return mCurrentRecord.getCalledGenotype();
}
/**
* get the genotypes
*
* @return a list of the genotypes
*/
public List getGenotypes() {
assertNotNull();
return mCurrentRecord.getGenotypes();
}
/**
* get the genotypes
*
* @return a list of the genotypes
*/
public List getVCFGenotypeRecords() {
assertNotNull();
return mCurrentRecord.getVCFGenotypeRecords();
}
/**
* Returns the genotype associated with sample, or null if the genotype is missing
*
* @param sampleName the name of the sample genotype to fetch
* @return
*/
public Genotype getGenotype(final String sampleName) {
return mCurrentRecord.getGenotype(sampleName);
}
/**
* do we have the specified genotype? not all backedByGenotypes
* have all the genotype data.
*
* @param x the genotype
*
* @return true if available, false otherwise
*/
public boolean hasGenotype(DiploidGenotype x) {
assertNotNull();
return mCurrentRecord.hasGenotype(x);
}
public String[] getSampleNames() {
assertNotNull();
return mCurrentRecord.getSampleNames();
}
// public Map getSampleGenotypes() {
// String[] samples = getSampleNames();
// List genotypes = getGenotypes();
// HashMap map = new HashMap();
//
// for ( int i = 0; i < samples.length; i++ ) {
// map.put(samples[i], genotypes.get(i));
// }
//
// return map;
// }
public Map getInfoValues() {
assertNotNull();
return mCurrentRecord.getInfoValues();
}
public String[] getFilteringCodes() {
assertNotNull();
return mCurrentRecord.getFilteringCodes();
}
public boolean isFiltered() {
assertNotNull();
return mCurrentRecord.isFiltered();
}
// public boolean hasFilteringCodes() {
// assertNotNull();
// return mCurrentRecord.hasFilteringCodes();
// }
public String getFilterString() {
assertNotNull();
return mCurrentRecord.getFilterString();
}
public VCFHeader getHeader() {
return mReader.getHeader();
}
public boolean hasNext() {
return mReader.hasNext();
}
/**
* @return the next element in the iteration.
* @throws NoSuchElementException - iterator has no more elements.
*/
public RodVCF next() {
if (!this.hasNext()) throw new NoSuchElementException("RodVCF next called on iterator with no more elements");
// get the next record
VCFRecord rec = mReader.next();
// make sure the next VCF record isn't before the current record (we'll accept at the same location, the
// spec doesn't indicate, and it seems like a valid use case)
GenomeLoc curPosition = null;
if (mCurrentRecord != null) curPosition = mCurrentRecord.getLocation();
if (curPosition != null && rec != null && curPosition.compareTo(rec.getLocation()) > 0)
throw new StingException("The next VCF record appears to be before the current (current location => " + curPosition.toString() +
", the next record position => " + rec.getLocation().toString() + " with line : " + rec.toStringEncoding(mReader.getHeader()) + "). " +
"Check to make sure the input VCF file is correctly sorted.");
// save off the previous record. This is needed given how iterators are used in the ROD system;
// we need to save off the last record
mCurrentRecord = rec;
return new RodVCF(name, rec, mReader);
}
public void remove() {
throw new UnsupportedOperationException("The remove operation is not supported for a VCF rod");
}
}