VCF moved over to tribble.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3302 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
ad11201235
commit
a68f3b2e9c
|
|
@ -29,10 +29,10 @@ import java.io.*;
|
|||
import java.util.Set;
|
||||
import java.util.HashSet;
|
||||
|
||||
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.io.storage;
|
||||
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
|
||||
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Set;
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
package org.broadinstitute.sting.gatk.io.stubs;
|
||||
|
||||
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
|
||||
import java.io.File;
|
||||
|
|
|
|||
|
|
@ -1,351 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class RodVCF
|
||||
* <p/>
|
||||
* An implementation of the ROD for VCF.
|
||||
*/
|
||||
public class RodVCF extends BasicReferenceOrderedDatum implements Iterator<RodVCF> {
|
||||
public VCFReader getReader() {
|
||||
return mReader;
|
||||
}
|
||||
|
||||
// our VCF related information
|
||||
private VCFReader mReader;
|
||||
|
||||
public VCFRecord getRecord() {
|
||||
return mCurrentRecord;
|
||||
}
|
||||
|
||||
public VCFRecord mCurrentRecord;
|
||||
|
||||
public RodVCF(String name) {
|
||||
super(name);
|
||||
}
|
||||
|
||||
public RodVCF(String name, VCFRecord currentRecord, VCFReader reader) {
|
||||
super(name);
|
||||
mCurrentRecord = currentRecord;
|
||||
mReader = reader;
|
||||
}
|
||||
|
||||
public void assertNotNull() {
|
||||
if ( mCurrentRecord == null ) {
|
||||
throw new UnsupportedOperationException("The current Record is null");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean parseLine(Object header, String[] parts) throws IOException {
|
||||
throw new UnsupportedOperationException("RodVCF does not support the parseLine method");
|
||||
}
|
||||
|
||||
public Object initialize(final File source) throws FileNotFoundException {
|
||||
if ( mReader == null ) {
|
||||
mReader = new VCFReader(source);
|
||||
}
|
||||
return mReader.getHeader();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return (mCurrentRecord != null ? mCurrentRecord.toStringEncoding(mReader.getHeader()) : "");
|
||||
}
|
||||
|
||||
public static RodVCF createIterator(String name, File file) {
|
||||
RodVCF vcf = new RodVCF(name);
|
||||
try {
|
||||
vcf.initialize(file);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("Unable to find file " + file);
|
||||
}
|
||||
return vcf;
|
||||
}
|
||||
|
||||
public boolean hasNonRefAlleleFrequency() {
|
||||
assertNotNull();
|
||||
return mCurrentRecord.getNonRefAlleleFrequency() > 0.0;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the frequency of this variant
|
||||
*
|
||||
* @return VariantFrequency with the stored frequency
|
||||
*/
|
||||
public double getNonRefAlleleFrequency() {
|
||||
assertNotNull();
|
||||
return mCurrentRecord.getNonRefAlleleFrequency();
|
||||
}
|
||||
|
||||
public String getID() {
|
||||
assertNotNull();
|
||||
return mCurrentRecord.getID();
|
||||
}
|
||||
|
||||
/**
|
||||
* are we a SNP? If not we're a Indel/deletion
|
||||
*
|
||||
* @return true if we're a SNP
|
||||
*/
|
||||
public boolean isSNP() {
|
||||
assertNotNull();
|
||||
return mCurrentRecord.isSNP();
|
||||
}
|
||||
|
||||
/**
|
||||
* are we a novel site? Is there a DBSNP identifier
|
||||
* or a hapmap entry for the site?
|
||||
*
|
||||
* @return true if this site is novel
|
||||
*/
|
||||
public boolean isNovel() {
|
||||
assertNotNull();
|
||||
return mCurrentRecord.isNovel();
|
||||
}
|
||||
|
||||
/**
|
||||
* are we an insertion?
|
||||
*
|
||||
* @return true if we are, false otherwise
|
||||
*/
|
||||
public boolean isInsertion() {
|
||||
assertNotNull();
|
||||
return mCurrentRecord.isInsertion();
|
||||
}
|
||||
|
||||
/**
|
||||
* are we an insertion?
|
||||
*
|
||||
* @return true if we are, false otherwise
|
||||
*/
|
||||
public boolean isDeletion() {
|
||||
assertNotNull();
|
||||
return mCurrentRecord.isDeletion();
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc getLocation() {
|
||||
assertNotNull();
|
||||
return mCurrentRecord.getLocation();
|
||||
}
|
||||
|
||||
/**
|
||||
* get the reference base(s) at this position
|
||||
*
|
||||
* @return the reference base or bases, as a string
|
||||
*/
|
||||
public String getReference() {
|
||||
assertNotNull();
|
||||
return mCurrentRecord.getReference();
|
||||
}
|
||||
|
||||
/** are we bi-allelic?
|
||||
* @return true if we're bi-allelic
|
||||
*/
|
||||
public boolean isBiallelic() {
|
||||
assertNotNull();
|
||||
return mCurrentRecord.isBiallelic();
|
||||
}
|
||||
|
||||
/**
|
||||
* get the -1 * (log 10 of the error value)
|
||||
*
|
||||
* @return the log based error estimate
|
||||
*/
|
||||
public double getNegLog10PError() {
|
||||
assertNotNull();
|
||||
return mCurrentRecord.getNegLog10PError();
|
||||
}
|
||||
|
||||
public double getQual() {
|
||||
assertNotNull();
|
||||
return mCurrentRecord.getQual();
|
||||
}
|
||||
|
||||
public boolean hasAlternateAllele() {
|
||||
assertNotNull();
|
||||
return mCurrentRecord.hasAlternateAllele();
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate alleles. This method should return all the alleles present at the location,
|
||||
* NOT including the reference base. This is returned as a string list with no guarantee ordering
|
||||
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
|
||||
* frequency).
|
||||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
public List<String> getAlternateAlleleList() {
|
||||
assertNotNull();
|
||||
return mCurrentRecord.getAlternateAlleleList();
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alleles. This method should return all the alleles present at the location,
|
||||
* including the reference base. The first allele should always be the reference allele, followed
|
||||
* by an unordered list of alternate alleles.
|
||||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
public List<String> getAlleleList() {
|
||||
assertNotNull();
|
||||
return mCurrentRecord.getAlleleList();
|
||||
}
|
||||
|
||||
/**
|
||||
* are we truely a variant, given a reference
|
||||
*
|
||||
* @return false if we're a variant(indel, delete, SNP, etc), true if we're not
|
||||
*/
|
||||
public boolean isReference() {
|
||||
assertNotNull();
|
||||
return mCurrentRecord.isReference();
|
||||
}
|
||||
|
||||
/**
|
||||
* are we an insertion or a deletion? yes, then return true. No? Well, false then.
|
||||
*
|
||||
* @return true if we're an insertion or deletion
|
||||
*/
|
||||
public boolean isIndel() {
|
||||
assertNotNull();
|
||||
return mCurrentRecord.isIndel();
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
|
||||
* of
|
||||
*
|
||||
* @return a char, representing the alternate base
|
||||
*/
|
||||
public char getAlternativeBaseForSNP() {
|
||||
assertNotNull();
|
||||
return mCurrentRecord.getAlternativeBaseForSNP();
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
|
||||
*
|
||||
* @return a char, representing the alternate base
|
||||
*/
|
||||
public char getReferenceForSNP() {
|
||||
assertNotNull();
|
||||
return mCurrentRecord.getReferenceForSNP();
|
||||
}
|
||||
|
||||
public boolean hasGenotypeData() {
|
||||
assertNotNull();
|
||||
return mCurrentRecord.hasGenotypeData();
|
||||
}
|
||||
|
||||
/**
|
||||
* get the genotypes
|
||||
*
|
||||
* @return a list of the genotypes
|
||||
*/
|
||||
public List<VCFGenotypeRecord> getVCFGenotypeRecords() {
|
||||
assertNotNull();
|
||||
return mCurrentRecord.getVCFGenotypeRecords();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the genotype associated with sample, or null if the genotype is missing
|
||||
*
|
||||
* @param sampleName the name of the sample genotype to fetch
|
||||
* @return genotype record
|
||||
*/
|
||||
public VCFGenotypeRecord getGenotype(final String sampleName) {
|
||||
return mCurrentRecord.getGenotype(sampleName);
|
||||
}
|
||||
|
||||
public String[] getSampleNames() {
|
||||
assertNotNull();
|
||||
return mCurrentRecord.getSampleNames();
|
||||
}
|
||||
|
||||
// public Map<String, Genotype> getSampleGenotypes() {
|
||||
// String[] samples = getSampleNames();
|
||||
// List<Genotype> genotypes = getGenotypes();
|
||||
// HashMap<String, Genotype> map = new HashMap<String, Genotype>();
|
||||
//
|
||||
// for ( int i = 0; i < samples.length; i++ ) {
|
||||
// map.put(samples[i], genotypes.get(i));
|
||||
// }
|
||||
//
|
||||
// return map;
|
||||
// }
|
||||
|
||||
public Map<String, String> getInfoValues() {
|
||||
assertNotNull();
|
||||
return mCurrentRecord.getInfoValues();
|
||||
}
|
||||
|
||||
public String[] getFilteringCodes() {
|
||||
assertNotNull();
|
||||
return mCurrentRecord.getFilteringCodes();
|
||||
}
|
||||
|
||||
public boolean isFiltered() {
|
||||
assertNotNull();
|
||||
return mCurrentRecord.isFiltered();
|
||||
}
|
||||
|
||||
// public boolean hasFilteringCodes() {
|
||||
// assertNotNull();
|
||||
// return mCurrentRecord.hasFilteringCodes();
|
||||
// }
|
||||
|
||||
public String getFilterString() {
|
||||
assertNotNull();
|
||||
return mCurrentRecord.getFilterString();
|
||||
}
|
||||
|
||||
public VCFHeader getHeader() {
|
||||
return mReader.getHeader();
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return mReader.hasNext();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the next element in the iteration.
|
||||
* @throws NoSuchElementException - iterator has no more elements.
|
||||
*/
|
||||
public RodVCF next() {
|
||||
if (!this.hasNext()) throw new NoSuchElementException("RodVCF next called on iterator with no more elements");
|
||||
|
||||
// get the next record
|
||||
VCFRecord rec = mReader.next();
|
||||
|
||||
// make sure the next VCF record isn't before the current record (we'll accept at the same location, the
|
||||
// spec doesn't indicate, and it seems like a valid use case)
|
||||
GenomeLoc curPosition = null;
|
||||
if (mCurrentRecord != null) curPosition = mCurrentRecord.getLocation();
|
||||
if (curPosition != null && rec != null && curPosition.compareTo(rec.getLocation()) > 0)
|
||||
throw new StingException("The next VCF record appears to be before the current (current location => " + curPosition.toString() +
|
||||
", the next record position => " + rec.getLocation().toString() + " with line : " + rec.toStringEncoding(mReader.getHeader()) + "). " +
|
||||
"Check to make sure the input VCF file is correctly sorted.");
|
||||
|
||||
// save off the previous record. This is needed given how iterators are used in the ROD system;
|
||||
// we need to save off the last record
|
||||
mCurrentRecord = rec;
|
||||
return new RodVCF(name, rec, mReader);
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("The remove operation is not supported for a VCF rod");
|
||||
}
|
||||
}
|
||||
|
|
@ -4,6 +4,7 @@ import edu.mit.broad.picard.genotype.DiploidGenotype;
|
|||
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
|
||||
import org.broad.tribble.dbsnp.DbSNPFeature;
|
||||
import org.broad.tribble.gelitext.GeliTextFeature;
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableGenotype;
|
||||
|
|
@ -16,7 +17,6 @@ import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
|
|||
import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.glf.GLFSingleCall;
|
||||
import org.broadinstitute.sting.utils.genotype.glf.GLFWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -46,7 +46,6 @@ public class VariantContextAdaptors {
|
|||
|
||||
static {
|
||||
adaptors.put(DbSNPFeature.class, new DBSnpAdaptor());
|
||||
adaptors.put(RodVCF.class, new RodVCFAdaptor());
|
||||
adaptors.put(VCFRecord.class, new VCFRecordAdaptor());
|
||||
adaptors.put(PlinkRod.class, new PlinkRodAdaptor());
|
||||
adaptors.put(HapMapROD.class, new HapMapAdaptor());
|
||||
|
|
@ -132,23 +131,6 @@ public class VariantContextAdaptors {
|
|||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// VCF to VariantContext and back again
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
private static class RodVCFAdaptor extends VCAdaptor {
|
||||
// WARNING: do not use this method if you have anything other than point mutations in your VCF
|
||||
VariantContext convert(String name, Object input) {
|
||||
return vcfToVariantContext(name, ((RodVCF)input).getRecord(), null);
|
||||
}
|
||||
|
||||
VariantContext convert(String name, Object input, ReferenceContext ref) {
|
||||
return vcfToVariantContext(name, ((RodVCF)input).getRecord(), ref);
|
||||
}
|
||||
}
|
||||
|
||||
private static class VCFRecordAdaptor extends VCAdaptor {
|
||||
// WARNING: do not use this method if you have anything other than point mutations in your VCF
|
||||
VariantContext convert(String name, Object input) {
|
||||
|
|
@ -229,7 +211,7 @@ public class VariantContextAdaptors {
|
|||
|
||||
double qual = vcf.isMissingQual() ? VariantContext.NO_NEG_LOG_10PERROR : vcf.getNegLog10PError();
|
||||
|
||||
GenomeLoc loc = vcf.getLocation();
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(vcf.getChr(),vcf.getStart());
|
||||
if ( vcf.isDeletion() )
|
||||
loc = GenomeLocParser.createGenomeLoc(loc.getContig(), loc.getStart(), loc.getStart()+refAllele.length()-1);
|
||||
|
||||
|
|
|
|||
|
|
@ -66,7 +66,6 @@ public class RODTrackBuilder implements RMDTrackBuilder {
|
|||
Types.put("HapMap", HapMapROD.class);
|
||||
Types.put("Intervals", IntervalRod.class);
|
||||
Types.put("GLF", RodGLF.class);
|
||||
Types.put("VCF", RodVCF.class);
|
||||
Types.put("PicardDbSNP", rodPicardDbSNP.class);
|
||||
Types.put("HapmapVCF", HapmapVCFROD.class);
|
||||
Types.put("Beagle", BeagleROD.class);
|
||||
|
|
|
|||
|
|
@ -67,8 +67,7 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
|
|||
public Map<String, Class> getAvailableTrackNamesAndTypes() {
|
||||
Map<String, Class> classes = new HashMap<String, Class>();
|
||||
for (String c : this.pluginsByName.keySet())
|
||||
// we're excluding VCF right now
|
||||
if (!c.contains("VCF")) classes.put(c,this.pluginsByName.get(c));
|
||||
classes.put(c,this.pluginsByName.get(c));
|
||||
return classes;
|
||||
}
|
||||
|
||||
|
|
@ -88,15 +87,13 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
|
|||
// make a feature reader
|
||||
FeatureReader reader;
|
||||
try {
|
||||
FeatureCodec codec = this.createByType(targetClass);
|
||||
|
||||
// check to see if the input file has an index
|
||||
if (!(new File(inputFile.getAbsolutePath() + linearIndexExtension).canRead())) {
|
||||
LinearIndex index = createIndex(inputFile, codec);
|
||||
reader = new FeatureReader(inputFile,index, codec);
|
||||
LinearIndex index = createIndex(inputFile, this.createByType(targetClass));
|
||||
reader = new FeatureReader(inputFile,index, this.createByType(targetClass));
|
||||
}
|
||||
else {
|
||||
reader = new FeatureReader(inputFile,codec);
|
||||
reader = new FeatureReader(inputFile,this.createByType(targetClass));
|
||||
}
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("Unable to create reader with file " + inputFile, e);
|
||||
|
|
|
|||
|
|
@ -26,6 +26,10 @@
|
|||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import org.broad.tribble.dbsnp.DbSNPFeature;
|
||||
import org.broad.tribble.vcf.VCFGenotypeRecord;
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
|
|
@ -95,8 +99,8 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
|||
throw new IllegalStateException("VCF record was created, but no rod data is present");
|
||||
|
||||
Object rod = rods.get(0);
|
||||
if ( rod instanceof RodVCF )
|
||||
samples.addAll(Arrays.asList(((RodVCF)rod).getSampleNames()));
|
||||
if ( rod instanceof VCFRecord )
|
||||
samples.addAll(Arrays.asList(((VCFRecord)rod).getSampleNames()));
|
||||
else if ( rod instanceof HapMapROD )
|
||||
samples.addAll(Arrays.asList(((HapMapROD)rod).getSampleIDs()));
|
||||
else
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.TabularROD;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
|
|
|||
|
|
@ -25,13 +25,13 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.vcf.VCFFormatHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFFormatHeaderLine;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
|
|
@ -36,7 +37,7 @@ import org.broadinstitute.sting.utils.collections.Pair;
|
|||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.ExtendedPileupElement;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
|
||||
import java.util.*;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.genotype.HardyWeinbergCalculation;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
|
|
@ -7,7 +8,6 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
|
|
@ -7,7 +8,6 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
|
|
@ -8,7 +9,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnot
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
|
|
|
|||
|
|
@ -1,15 +1,14 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
|
|
@ -9,8 +11,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnota
|
|||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.ArrayList;
|
||||
|
|
|
|||
|
|
@ -25,11 +25,11 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
|
|
@ -7,7 +8,6 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
|
|
|
|||
|
|
@ -25,12 +25,14 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotationType;
|
||||
|
|
@ -41,8 +43,6 @@ import org.broadinstitute.sting.utils.classloader.PackageUtils;
|
|||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
|
||||
|
|
@ -184,7 +184,7 @@ public class VariantAnnotator extends LocusWalker<Integer, Integer> {
|
|||
}
|
||||
}
|
||||
|
||||
if ( variant instanceof RodVCF ) {
|
||||
if ( variant instanceof VCFRecord) {
|
||||
for(VariantContext annotatedVC : annotatedVCs ) {
|
||||
vcfWriter.addRecord(VariantContextAdaptors.toVCF(annotatedVC, ref.getBase()));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,6 +26,9 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.dbsnp.DbSNPFeature;
|
||||
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
|
|
@ -41,9 +44,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnot
|
|||
import org.broadinstitute.sting.playground.gatk.walkers.annotator.GenomicAnnotation;
|
||||
import org.broadinstitute.sting.utils.classloader.PackageUtils;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
|
||||
|
||||
import org.broad.tribble.vcf.VCFFormatHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFFormatHeaderLine;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
|
||||
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
|
|
|
|||
|
|
@ -25,10 +25,13 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.concordance;
|
||||
|
||||
import org.broad.tribble.vcf.VCFGenotypeRecord;
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.classloader.PackageUtils;
|
||||
|
|
@ -157,12 +160,14 @@ public class CallsetConcordanceWalker extends RodWalker<Integer, Integer> {
|
|||
return 0;
|
||||
|
||||
// get all of the vcf rods at this locus
|
||||
ArrayList<RodVCF> vcfRods = new ArrayList<RodVCF>();
|
||||
Map<VCFRecord,String> vcfRods = new LinkedHashMap<VCFRecord,String>();
|
||||
Iterator<GATKFeature> rods = rodData.getAllRods().iterator();
|
||||
while (rods.hasNext()) {
|
||||
GATKFeature rod = rods.next();
|
||||
if ( rod.getUnderlyingObject() instanceof RodVCF )
|
||||
vcfRods.add((RodVCF)rod.getUnderlyingObject());
|
||||
if ( rod.getUnderlyingObject() instanceof VCFRecord ) {
|
||||
if (vcfRods.containsKey(rod)) throw new StingException("Duplicate VCF's found");
|
||||
vcfRods.put((VCFRecord)rod.getUnderlyingObject(),rod.getName());
|
||||
}
|
||||
}
|
||||
|
||||
if ( vcfRods.size() == 0 )
|
||||
|
|
@ -171,12 +176,12 @@ public class CallsetConcordanceWalker extends RodWalker<Integer, Integer> {
|
|||
// pull out all of the individual calls from the rods and insert into a map based on the
|
||||
// mapping from rod/sample to uniquified name
|
||||
HashMap<String, VCFGenotypeRecord> samplesToRecords = new HashMap<String, VCFGenotypeRecord>();
|
||||
for ( RodVCF rod : vcfRods ) {
|
||||
for ( VCFRecord rod : vcfRods.keySet() ) {
|
||||
List<VCFGenotypeRecord> records = rod.getVCFGenotypeRecords();
|
||||
for ( VCFGenotypeRecord vcfRec : records ) {
|
||||
String uniquifiedSample = rodNamesToSampleNames.get(new Pair<String, String>(rod.getName(), vcfRec.getSampleName()));
|
||||
String uniquifiedSample = rodNamesToSampleNames.get(new Pair<String, String>(vcfRods.get(rod), vcfRec.getSampleName()));
|
||||
if ( uniquifiedSample == null )
|
||||
throw new StingException("Unexpected sample encountered: " + vcfRec.getSampleName() + " in rod " + rod.getName());
|
||||
throw new StingException("Unexpected sample encountered: " + vcfRec.getSampleName() + " in rod " + vcfRods.get(rod));
|
||||
|
||||
samplesToRecords.put(uniquifiedSample, vcfRec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.concordance;
|
||||
|
||||
import org.broad.tribble.vcf.VCFGenotypeRecord;
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.concordance;
|
||||
|
||||
import org.broad.tribble.vcf.VCFGenotypeRecord;
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -103,5 +103,5 @@ public class IndelSubsets implements ConcordanceType {
|
|||
}
|
||||
|
||||
public String getInfoName() { return "IndelSubsets"; }
|
||||
public VCFInfoHeaderLine getInfoDescription() { return new VCFInfoHeaderLine(getInfoName(), 1, VCFInfoHeaderLine.INFO_TYPE.String, "Indel-related subsets"); }
|
||||
public VCFInfoHeaderLine getInfoDescription() { return new VCFInfoHeaderLine(getInfoName(), 1, VCFInfoHeaderLine.INFO_TYPE.String, "Indel-related subsets"); }
|
||||
}
|
||||
|
|
@ -1,8 +1,8 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.concordance;
|
||||
|
||||
import org.broad.tribble.vcf.VCFGenotypeRecord;
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.Map.Entry;
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.concordance;
|
||||
|
||||
import org.broad.tribble.vcf.VCFGenotypeRecord;
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -116,5 +116,5 @@ public class SNPGenotypeConcordance implements ConcordanceType {
|
|||
}
|
||||
|
||||
public String getInfoName() { return "SnpConcordance"; }
|
||||
public VCFInfoHeaderLine getInfoDescription() { return new VCFInfoHeaderLine(getInfoName(), 1, VCFInfoHeaderLine.INFO_TYPE.String, "SNP concordance test"); }
|
||||
public VCFInfoHeaderLine getInfoDescription() { return new VCFInfoHeaderLine(getInfoName(), 1, VCFInfoHeaderLine.INFO_TYPE.String, "SNP concordance test"); }
|
||||
}
|
||||
|
|
@ -1,9 +1,9 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.concordance;
|
||||
|
||||
import org.broad.tribble.vcf.VCFGenotypeRecord;
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -64,5 +64,5 @@ public class SimpleVenn implements ConcordanceType {
|
|||
}
|
||||
|
||||
public String getInfoName() { return "Venn"; }
|
||||
public VCFInfoHeaderLine getInfoDescription() { return new VCFInfoHeaderLine(getInfoName(), 1, VCFInfoHeaderLine.INFO_TYPE.String, "2-way Venn split"); }
|
||||
public VCFInfoHeaderLine getInfoDescription() { return new VCFInfoHeaderLine(getInfoName(), 1, VCFInfoHeaderLine.INFO_TYPE.String, "2-way Venn split"); }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.filters;
|
||||
|
||||
import org.apache.commons.jexl.Expression;
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||
|
|
@ -40,6 +40,7 @@ import org.broadinstitute.sting.gatk.walkers.Requires;
|
|||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
|
|||
|
|
@ -25,11 +25,11 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broad.tribble.vcf.VCFGenotypeRecord;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.CalledGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broad.tribble.dbsnp.DbSNPFeature;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.pileup.*;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
|
||||
|
|
|
|||
|
|
@ -25,6 +25,10 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broad.tribble.vcf.VCFGenotypeRecord;
|
||||
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.contexts.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.sequenom;
|
||||
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
|
||||
|
|
@ -38,6 +39,7 @@ import org.broadinstitute.sting.gatk.walkers.*;
|
|||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,13 +1,13 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.refdata;
|
||||
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.gatk.refdata.BasicReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
|
|
@ -23,14 +23,15 @@ public class HapmapVCFROD extends BasicReferenceOrderedDatum implements Iterator
|
|||
// This is a (hopefully temporary) wrapper class for certain VCF files that we want to protect from
|
||||
// utilities that grab genotypes or sample names across all VCF files
|
||||
|
||||
private RodVCF rod;
|
||||
private VCFRecord record;
|
||||
private VCFReader reader;
|
||||
|
||||
public VCFReader getReader() {
|
||||
return rod.getReader();
|
||||
return reader;
|
||||
}
|
||||
|
||||
public VCFRecord getRecord() {
|
||||
return rod.getRecord();
|
||||
return record;
|
||||
}
|
||||
|
||||
public HapmapVCFROD(String name) {
|
||||
|
|
@ -39,85 +40,77 @@ public class HapmapVCFROD extends BasicReferenceOrderedDatum implements Iterator
|
|||
|
||||
public HapmapVCFROD(String name, VCFRecord currentRecord, VCFReader reader) {
|
||||
super(name);
|
||||
rod = new RodVCF(name,currentRecord,reader);
|
||||
}
|
||||
|
||||
public HapmapVCFROD(String name, RodVCF rod) {
|
||||
super(name);
|
||||
this.rod = rod;
|
||||
this.record = currentRecord;
|
||||
this.reader = reader;
|
||||
}
|
||||
|
||||
public Object initialize(final File source) throws FileNotFoundException {
|
||||
rod = new RodVCF(name);
|
||||
rod.initialize(source);
|
||||
return rod.getHeader();
|
||||
reader = new VCFReader(source);
|
||||
if (reader.hasNext()) record = reader.next();
|
||||
return reader.getHeader();
|
||||
}
|
||||
|
||||
public boolean parseLine(Object obj, String[] args) {
|
||||
try {
|
||||
return rod.parseLine(obj,args);
|
||||
} catch (Exception e) {
|
||||
throw new UnsupportedOperationException("Parse line not supported",e);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public double getNegLog10PError() {
|
||||
return rod.getNegLog10PError();
|
||||
return record.getNegLog10PError();
|
||||
}
|
||||
public String getReference() {
|
||||
return rod.getReference();
|
||||
return record.getReference();
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return rod.toString();
|
||||
return record.toString();
|
||||
}
|
||||
|
||||
public List<String> getAlternateAlleleList() {
|
||||
return rod.getAlternateAlleleList();
|
||||
return record.getAlternateAlleleList();
|
||||
}
|
||||
|
||||
public boolean isDeletion() {
|
||||
return rod.isDeletion();
|
||||
return record.isDeletion();
|
||||
}
|
||||
|
||||
public GenomeLoc getLocation() {
|
||||
return rod.getLocation();
|
||||
return GenomeLocParser.createGenomeLoc(record.getChr(),record.getStart());
|
||||
}
|
||||
|
||||
public boolean isBiallelic() {
|
||||
return rod.isBiallelic();
|
||||
return record.isBiallelic();
|
||||
}
|
||||
|
||||
public boolean isIndel() {
|
||||
return rod.isIndel();
|
||||
return record.isIndel();
|
||||
}
|
||||
|
||||
public boolean isSNP() {
|
||||
return rod.isSNP();
|
||||
return record.isSNP();
|
||||
}
|
||||
|
||||
public boolean isReference() {
|
||||
return rod.isReference();
|
||||
return record.isReference();
|
||||
}
|
||||
|
||||
public double getNonRefAlleleFrequency() {
|
||||
return rod.getNonRefAlleleFrequency();
|
||||
return record.getNonRefAlleleFrequency();
|
||||
}
|
||||
|
||||
public char getAlternativeBaseForSNP() {
|
||||
return rod.getAlternativeBaseForSNP();
|
||||
return record.getAlternativeBaseForSNP();
|
||||
}
|
||||
|
||||
public boolean isInsertion() {
|
||||
return rod.isInsertion();
|
||||
return record.isInsertion();
|
||||
}
|
||||
|
||||
public List<String> getAlleleList() {
|
||||
return rod.getAlleleList();
|
||||
return record.getAlleleList();
|
||||
}
|
||||
|
||||
public char getReferenceForSNP() {
|
||||
return rod.getReferenceForSNP();
|
||||
return record.getReferenceForSNP();
|
||||
}
|
||||
|
||||
public boolean hasGenotype(DiploidGenotype g) {
|
||||
|
|
@ -126,29 +119,29 @@ public class HapmapVCFROD extends BasicReferenceOrderedDatum implements Iterator
|
|||
}
|
||||
|
||||
public VCFHeader getHeader() {
|
||||
return rod.getHeader();
|
||||
return record.getHeader();
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return rod.hasNext();
|
||||
return reader.hasNext();
|
||||
}
|
||||
|
||||
public HapmapVCFROD next() {
|
||||
return new HapmapVCFROD(name,rod.next());
|
||||
return new HapmapVCFROD(name, record, reader);
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
rod.remove();
|
||||
throw new UnsupportedOperationException("Unable to remove");
|
||||
}
|
||||
|
||||
public static HapmapVCFROD createIterator(String name, File file) {
|
||||
RodVCF vcf = new RodVCF(name);
|
||||
HapmapVCFROD vcf = new HapmapVCFROD(name);
|
||||
try {
|
||||
vcf.initialize(file);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("Unable to find file " + file);
|
||||
throw new StingException("Unable to load file",e);
|
||||
}
|
||||
return new HapmapVCFROD(name,vcf);
|
||||
return vcf;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,16 +1,16 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||
|
||||
import org.broad.tribble.vcf.VCFCodec;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RMD;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -22,7 +22,7 @@ import java.util.*;
|
|||
* Time: 3:25:11 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
@Requires(value= DataSource.REFERENCE,referenceMetaData = {@RMD(name="variants",type= RodVCF.class)})
|
||||
@Requires(value= DataSource.REFERENCE,referenceMetaData = {@RMD(name="variants",type= VCFCodec.class)})
|
||||
public class AlleleBalanceHistogramWalker extends LocusWalker<Map<String,Double>, Map<String,Set<Double>>> {
|
||||
|
||||
|
||||
|
|
@ -47,12 +47,11 @@ public class AlleleBalanceHistogramWalker extends LocusWalker<Map<String,Double>
|
|||
}
|
||||
|
||||
public Map<String,Double> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
RodVCF vcfRod = tracker.lookup("variants",RodVCF.class);
|
||||
VCFRecord record = tracker.lookup("variants", VCFRecord.class);
|
||||
|
||||
if ( vcfRod == null ) {
|
||||
if ( record == null ) {
|
||||
return null;
|
||||
}
|
||||
VCFRecord record = vcfRod.getRecord();
|
||||
|
||||
return getAlleleBalanceBySample(record,ref,context);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,15 +1,16 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeatureIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||
|
|
@ -93,12 +94,12 @@ public class IndelAnnotator extends RodWalker<Integer,Long>{
|
|||
|
||||
Object variant = rods.get(0);
|
||||
|
||||
if ( variant instanceof RodVCF ) {
|
||||
if ( variant instanceof VCFRecord) {
|
||||
RODRecordList annotationList = (refseqIterator == null ? null : refseqIterator.seekForward(ref.getLocus()));
|
||||
String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList));
|
||||
annotationString = annotationString.split("\\s+")[0];
|
||||
((RodVCF) variant).getRecord().addInfoField("type",annotationString);
|
||||
vcfWriter.addRecord(((RodVCF) variant).getRecord());
|
||||
((VCFRecord) variant).addInfoField("type",annotationString);
|
||||
vcfWriter.addRecord((VCFRecord) variant);
|
||||
} else {
|
||||
throw new StingException("This one-off walker only deals with VCF files.");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,11 +1,12 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
||||
import org.broadinstitute.sting.gatk.walkers.Reference;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
|
|
@ -13,8 +14,6 @@ import org.broadinstitute.sting.gatk.walkers.Window;
|
|||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,12 +1,11 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
|
@ -42,14 +41,13 @@ public class VCFReferenceFixerWalker extends RodWalker<VCFRecord,Long> {
|
|||
}
|
||||
List<Object> rods = tracker.getReferenceMetaData("fixme");
|
||||
Object rod = rods.get(0);
|
||||
RodVCF vcfrod = null;
|
||||
if ( rod instanceof RodVCF ) {
|
||||
vcfrod = (RodVCF) rod;
|
||||
VCFRecord vcfrod = null;
|
||||
if ( rod instanceof VCFRecord ) {
|
||||
vcfrod = (VCFRecord) rod;
|
||||
}
|
||||
|
||||
VCFRecord rec = vcfrod.getRecord();
|
||||
rec.setReferenceBase(new String(BaseUtils.charSeq2byteSeq(context.getBases())));
|
||||
return rec;
|
||||
if (vcfrod != null) vcfrod.setReferenceBase(new String(BaseUtils.charSeq2byteSeq(context.getBases())));
|
||||
return vcfrod;
|
||||
|
||||
/*
|
||||
VariantContext vcon = null;
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.annotator;
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
|
|
|
|||
|
|
@ -25,9 +25,9 @@
|
|||
|
||||
package org.broadinstitute.sting.oneoffprojects.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
|
|
|||
|
|
@ -25,11 +25,11 @@
|
|||
|
||||
package org.broadinstitute.sting.oneoffprojects.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
|
|
|
|||
|
|
@ -25,12 +25,12 @@
|
|||
|
||||
package org.broadinstitute.sting.oneoffprojects.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.oneoffprojects.refdata.HapmapVCFROD;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
|
|
|||
|
|
@ -1,9 +1,10 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.varianteval.multisample;
|
||||
|
||||
import org.broad.tribble.vcf.VCFGenotypeRecord;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
|
|
@ -76,9 +77,9 @@ class LocusConcordanceInfo {
|
|||
|
||||
public GenomeLoc getLoc() {
|
||||
if ( concordanceType == ConcordanceType.TRUTH_SET || concordanceType == ConcordanceType.BOTH_SETS || concordanceType == ConcordanceType.TRUTH_SET_VARIANT_FILTERED) {
|
||||
return truthVCFRecord.getLocation();
|
||||
return GenomeLocParser.createGenomeLoc(truthVCFRecord.getChr(),truthVCFRecord.getStart());
|
||||
} else {
|
||||
return variantVCFRecord.getLocation();
|
||||
return GenomeLocParser.createGenomeLoc( variantVCFRecord.getChr(),variantVCFRecord.getStart());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -25,10 +25,11 @@
|
|||
|
||||
package org.broadinstitute.sting.oneoffprojects.walkers.varianteval.multisample;
|
||||
|
||||
import org.broad.tribble.vcf.VCFCodec;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.RMD;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
|
|
@ -41,7 +42,7 @@ import org.broadinstitute.sting.commandline.Argument;
|
|||
* a VCF binding with the name 'variants'.
|
||||
* @Author: Chris Hartl
|
||||
*/
|
||||
@Requires(value= DataSource.REFERENCE,referenceMetaData = {@RMD(name="truth",type= RodVCF.class),@RMD(name="variants",type= RodVCF.class)})
|
||||
@Requires(value= DataSource.REFERENCE,referenceMetaData = {@RMD(name="truth",type= VCFCodec.class),@RMD(name="variants",type= VCFRecord.class)})
|
||||
public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInfo, MultiSampleConcordanceSet > {
|
||||
@Argument(fullName="noLowDepthLoci", shortName="NLD", doc="Do not use loci in analysis where the variant depth (as specified in the VCF) is less than the given number; "+
|
||||
"DO NOT USE THIS IF YOUR VCF DOES NOT HAVE 'DP' IN THE FORMAT FIELD", required=false) private int minDepth = -1;
|
||||
|
|
@ -65,14 +66,14 @@ public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInf
|
|||
if ( tracker == null ) {
|
||||
return null;
|
||||
}
|
||||
RodVCF variantData = tracker.lookup("variants",RodVCF.class);
|
||||
VCFRecord variantData = tracker.lookup("variants", VCFRecord.class);
|
||||
if ( ignoreKnownSites ) { // ignoreKnownSites && tracker.lookup("variants",null) != null && ! ( (RodVCF) tracker.lookup("variants",null)).isNovel() ) )
|
||||
if ( variantData != null && ! variantData.isNovel() ) {
|
||||
//logger.info("Not novel: "+( (RodVCF) tracker.lookup("variants",null)).getID());
|
||||
return null;
|
||||
}
|
||||
}
|
||||
RodVCF truthData = tracker.lookup("truth",RodVCF.class);
|
||||
VCFRecord truthData = tracker.lookup("truth",VCFRecord.class);
|
||||
LocusConcordanceInfo concordance;
|
||||
|
||||
if ( truthData == null && variantData == null) {
|
||||
|
|
@ -82,33 +83,33 @@ public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInf
|
|||
} else if ( truthData == null ) {
|
||||
|
||||
// not in the truth set
|
||||
if ( ( (RodVCF) variantData ).isFiltered() ) {
|
||||
if ( variantData.isFiltered() ) {
|
||||
|
||||
concordance = null;
|
||||
|
||||
} else {
|
||||
|
||||
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.VARIANT_SET,null, ( (RodVCF) variantData ).getRecord(),ref);
|
||||
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.VARIANT_SET,null,variantData,ref);
|
||||
}
|
||||
|
||||
} else if ( variantData == null ) {
|
||||
|
||||
// not in the variant set
|
||||
if ( ( (RodVCF) truthData).isFiltered() ) {
|
||||
if ( (truthData).isFiltered() ) {
|
||||
|
||||
concordance = null;
|
||||
|
||||
} else {
|
||||
|
||||
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.TRUTH_SET,( (RodVCF) truthData).getRecord(),null,ref);
|
||||
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.TRUTH_SET,truthData,null,ref);
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
// in both
|
||||
// check for filtering
|
||||
boolean truth_filter = ((RodVCF) truthData).isFiltered();
|
||||
boolean call_filter = ((RodVCF) variantData).isFiltered();
|
||||
boolean truth_filter = truthData.isFiltered();
|
||||
boolean call_filter = variantData.isFiltered();
|
||||
|
||||
if ( truth_filter && call_filter ) {
|
||||
|
||||
|
|
@ -116,15 +117,15 @@ public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInf
|
|||
|
||||
} else if ( truth_filter ) {
|
||||
|
||||
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.VARIANT_SET,null, ( (RodVCF) variantData ).getRecord(),ref);
|
||||
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.VARIANT_SET,null,variantData,ref);
|
||||
|
||||
} else if ( call_filter ) {
|
||||
|
||||
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.TRUTH_SET_VARIANT_FILTERED,( (RodVCF) truthData).getRecord(), null ,ref);
|
||||
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.TRUTH_SET_VARIANT_FILTERED,truthData, null ,ref);
|
||||
|
||||
} else {
|
||||
|
||||
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.BOTH_SETS,( (RodVCF) truthData).getRecord(),( (RodVCF) variantData).getRecord(),ref);
|
||||
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.BOTH_SETS,truthData,variantData,ref);
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,12 +1,7 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.varianteval.multisample;
|
||||
|
||||
import org.broad.tribble.vcf.VCFGenotypeRecord;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeEncoding;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.oneoffprojects.walkers.vcftools;
|
||||
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
|
||||
|
|
@ -77,7 +78,7 @@ public class BeagleTrioToVCFWalker extends RodWalker<VariantContext, Long> {
|
|||
|
||||
if ( vc != null ) {
|
||||
if ( ! headerWritten ) {
|
||||
RodVCF vcfrod = tracker.lookup(TRACK_NAME,RodVCF.class);
|
||||
VCFRecord vcfrod = tracker.lookup(TRACK_NAME,VCFRecord.class);
|
||||
writer.writeHeader(vcfrod.getHeader());
|
||||
headerWritten = true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,10 +25,13 @@
|
|||
|
||||
package org.broadinstitute.sting.oneoffprojects.walkers.vcftools;
|
||||
|
||||
import org.broad.tribble.vcf.VCFGenotypeRecord;
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||
|
|
@ -70,8 +73,8 @@ public class SimpleVCFIntersectWalker extends RodWalker<VCFRecordMerger,Long>{
|
|||
return null;
|
||||
}
|
||||
|
||||
RodVCF priorityCall = tracker.lookup("priority",RodVCF.class);
|
||||
RodVCF otherCall = tracker.lookup("other",RodVCF.class);
|
||||
VCFRecord priorityCall = tracker.lookup("priority",VCFRecord.class);
|
||||
VCFRecord otherCall = tracker.lookup("other",VCFRecord.class);
|
||||
|
||||
if ( priorityCall == null && otherCall == null ) {
|
||||
return null;
|
||||
|
|
@ -82,9 +85,9 @@ public class SimpleVCFIntersectWalker extends RodWalker<VCFRecordMerger,Long>{
|
|||
if ( ! union ) {
|
||||
return null;
|
||||
} else if ( merger.hasBeenInstantiated() ) {
|
||||
merger.update(null,otherCall.getRecord());
|
||||
merger.update(null,otherCall);
|
||||
} else {
|
||||
merger.hold(otherCall.getRecord());
|
||||
merger.hold(otherCall);
|
||||
}
|
||||
|
||||
} else if ( otherCall == null ) {
|
||||
|
|
@ -92,17 +95,17 @@ public class SimpleVCFIntersectWalker extends RodWalker<VCFRecordMerger,Long>{
|
|||
if ( ! union ) {
|
||||
return null;
|
||||
} else if ( merger.hasBeenInstantiated() ) {
|
||||
merger.update(priorityCall.getRecord(),null);
|
||||
merger.update(priorityCall,null);
|
||||
} else {
|
||||
merger.hold(priorityCall.getRecord());
|
||||
merger.hold(priorityCall);
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
if ( merger.hasBeenInstantiated() ) {
|
||||
merger.update(priorityCall.getRecord(), otherCall.getRecord());
|
||||
merger.update(priorityCall, otherCall);
|
||||
} else {
|
||||
merger = instantiateMerger(priorityCall.getRecord(),otherCall.getRecord());
|
||||
merger = instantiateMerger(priorityCall,otherCall);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -255,8 +258,8 @@ class VCFRecordMerger {
|
|||
|
||||
private HashMap<VCFHeader.HEADER_FIELDS,String> getFields(VCFRecord record1, VCFRecord record2) {
|
||||
HashMap<VCFHeader.HEADER_FIELDS,String> fields = new HashMap<VCFHeader.HEADER_FIELDS,String>();
|
||||
fields.put(VCFHeader.HEADER_FIELDS.CHROM, record1.getLocation().getContig());
|
||||
fields.put(VCFHeader.HEADER_FIELDS.POS, String.format("%d",record1.getLocation().getStart()));
|
||||
fields.put(VCFHeader.HEADER_FIELDS.CHROM, record1.getChr());
|
||||
fields.put(VCFHeader.HEADER_FIELDS.POS, String.format("%d",record1.getStart()));
|
||||
fields.put(VCFHeader.HEADER_FIELDS.ID, record1.getID());
|
||||
fields.put(VCFHeader.HEADER_FIELDS.ALT, listAsString(record1.getAlternateAlleleList()));
|
||||
fields.put(VCFHeader.HEADER_FIELDS.REF, record1.getReference());
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.playground.gatk.walkers;
|
||||
|
||||
import org.broad.tribble.vcf.VCFGenotypeRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||
|
|
@ -42,7 +43,6 @@ import org.broadinstitute.sting.commandline.ArgumentCollection;
|
|||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.MendelianViolationEvaluator;
|
||||
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ import java.util.Map;
|
|||
import java.util.Set;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
|
||||
|
|
@ -21,7 +22,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
|
||||
/**
|
||||
* This plugin for {@link VariantAnnotatorEngine} serves as the core
|
||||
|
|
@ -73,8 +73,8 @@ public class GenomicAnnotation implements InfoFieldAnnotation {
|
|||
final Map<String, Object> annotations = new HashMap<String, Object>();
|
||||
for(final GATKFeature gatkFeature : tracker.getAllRods())
|
||||
{
|
||||
final ReferenceOrderedDatum rod = (ReferenceOrderedDatum) gatkFeature.getUnderlyingObject();
|
||||
final String name = rod.getName();
|
||||
final Object rod = gatkFeature.getUnderlyingObject();
|
||||
final String name = gatkFeature.getName();
|
||||
if( name.equals("variant") || name.equals("interval") ) {
|
||||
continue;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,13 +35,15 @@ import java.util.Map;
|
|||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
||||
import org.broadinstitute.sting.gatk.walkers.By;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
|
|
@ -50,8 +52,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
|||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
|
||||
|
|
@ -183,7 +183,7 @@ public class GenomicAnnotator extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
}
|
||||
|
||||
if ( variant instanceof RodVCF ) { //TODO is this if-statement necessary?
|
||||
if ( variant instanceof VCFRecord) { //TODO is this if-statement necessary?
|
||||
for(VariantContext annotatedVC : annotatedVCs ) {
|
||||
vcfWriter.addRecord(VariantContextAdaptors.toVCF(annotatedVC, ref.getBase()));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,11 +25,12 @@
|
|||
|
||||
package org.broadinstitute.sting.playground.gatk.walkers.diagnostics;
|
||||
|
||||
import org.broad.tribble.vcf.VCFCodec;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
|
@ -40,7 +41,7 @@ import org.broadinstitute.sting.commandline.Argument;
|
|||
* Computes the density of SNPs passing and failing filters in intervals on the genome and emits a table for display
|
||||
*/
|
||||
@By(DataSource.REFERENCE)
|
||||
@Requires(value={},referenceMetaData=@RMD(name="eval",type=RodVCF.class))
|
||||
@Requires(value={},referenceMetaData=@RMD(name="eval",type= VCFCodec.class))
|
||||
public class SNPDensity extends RefWalker<Pair<VariantContext, GenomeLoc>, SNPDensity.Counter> {
|
||||
@Argument(fullName="granularity", shortName="granularity", doc="", required=false)
|
||||
private int granularity = 1000000;
|
||||
|
|
@ -64,7 +65,7 @@ public class SNPDensity extends RefWalker<Pair<VariantContext, GenomeLoc>, SNPDe
|
|||
public Pair<VariantContext, GenomeLoc> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
VariantContext vc = null;
|
||||
|
||||
RodVCF vcf = tracker.lookup("eval",RodVCF.class);
|
||||
VCFRecord vcf = tracker.lookup("eval",VCFRecord.class);
|
||||
if (vcf != null)
|
||||
vc = VariantContextAdaptors.toVariantContext("eval", vcf);
|
||||
return new Pair<VariantContext, GenomeLoc>(vc, context.getLocation());
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.variantoptimizer;
|
||||
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
||||
|
||||
import java.util.*;
|
||||
import java.io.IOException;
|
||||
|
|
|
|||
|
|
@ -25,19 +25,21 @@
|
|||
|
||||
package org.broadinstitute.sting.playground.gatk.walkers.variantoptimizer;
|
||||
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
|
@ -128,7 +130,7 @@ public class ApplyVariantClustersWalker extends RodWalker<ExpandingArrayList<Var
|
|||
List<ReferenceOrderedDataSource> dataSources = this.getToolkit().getRodDataSources();
|
||||
for ( ReferenceOrderedDataSource source : dataSources ) {
|
||||
RMDTrack rod = source.getReferenceOrderedData();
|
||||
if ( rod.getType().equals(RodVCF.class) ) {
|
||||
if ( rod.getType().equals(VCFCodec.class) ) {
|
||||
VCFReader reader = new VCFReader(rod.getFile());
|
||||
Set<String> vcfSamples = reader.getHeader().getGenotypeSamples();
|
||||
samples.addAll(vcfSamples);
|
||||
|
|
|
|||
|
|
@ -25,12 +25,12 @@
|
|||
|
||||
package org.broadinstitute.sting.playground.gatk.walkers.variantoptimizer;
|
||||
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
|
@ -83,11 +83,11 @@ public class VariantConcordanceROCCurveWalker extends RodWalker<ExpandingArrayLi
|
|||
|
||||
for( ReferenceOrderedDataSource rod : this.getToolkit().getRodDataSources() ) {
|
||||
if( rod != null && !rod.getName().toUpperCase().startsWith("TRUTH") ) {
|
||||
if( rod.getReferenceOrderedData().getIterator().hasNext() && rod.getReferenceOrderedData().getIterator().next().getUnderlyingObject() instanceof RodVCF ) {
|
||||
if( rod.getReferenceOrderedData().getIterator().hasNext() && rod.getReferenceOrderedData().getIterator().next().getUnderlyingObject() instanceof VCFRecord ) {
|
||||
inputRodNames.add(rod.getName());
|
||||
System.out.println("Adding " + rod.getName() + " to input RodVCF list.");
|
||||
if( sampleName == null && !multiSampleCalls ) {
|
||||
final String[] samples = ((RodVCF)rod.getReferenceOrderedData().getIterator().next().getUnderlyingObject()).getSampleNames();
|
||||
final String[] samples = ((VCFRecord)rod.getReferenceOrderedData().getIterator().next().getUnderlyingObject()).getSampleNames();
|
||||
|
||||
if( samples.length > 1 ) {
|
||||
multiSampleCalls = true;
|
||||
|
|
|
|||
|
|
@ -25,10 +25,12 @@
|
|||
|
||||
package org.broadinstitute.sting.playground.gatk.walkers.vcftools;
|
||||
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
|
|
@ -136,12 +138,13 @@ public class VCFCombine extends RodWalker<VCFRecord, VCFWriter> {
|
|||
return null;
|
||||
|
||||
// get all of the vcf rods at this locus
|
||||
ArrayList<RodVCF> vcfRods = new ArrayList<RodVCF>();
|
||||
Map<VCFRecord, String> vcfRods = new LinkedHashMap<VCFRecord,String>();
|
||||
Iterator<GATKFeature> rods = tracker.getAllRods().iterator();
|
||||
while (rods.hasNext()) {
|
||||
Object rod = rods.next().getUnderlyingObject();
|
||||
if ( rod instanceof RodVCF )
|
||||
vcfRods.add((RodVCF)rod);
|
||||
GATKFeature feat = rods.next();
|
||||
Object rod = feat.getUnderlyingObject();
|
||||
if ( rod instanceof VCFRecord )
|
||||
vcfRods.put((VCFRecord)rod,feat.getName());
|
||||
}
|
||||
|
||||
if ( vcfRods.size() == 0 )
|
||||
|
|
@ -166,18 +169,20 @@ public class VCFCombine extends RodWalker<VCFRecord, VCFWriter> {
|
|||
return vcfWriter;
|
||||
}
|
||||
|
||||
private VCFRecord vcfUnion(ArrayList<RodVCF> rods) {
|
||||
private VCFRecord vcfUnion(Map<VCFRecord, String> rods) {
|
||||
if ( priority == null )
|
||||
return rods.get(0).mCurrentRecord;
|
||||
return rods.keySet().iterator().next();
|
||||
|
||||
if ( annotateUnion ) {
|
||||
Map<String, RodVCF> rodMap = new HashMap<String, RodVCF>();
|
||||
for ( RodVCF vcf : rods ) { rodMap.put(vcf.getName(), vcf); }
|
||||
Map<String, VCFRecord> rodMap = new HashMap<String, VCFRecord>();
|
||||
for ( VCFRecord vcf : rods.keySet() ) {
|
||||
rodMap.put(rods.get(vcf),vcf);
|
||||
}
|
||||
|
||||
String priority1 = priority[0];
|
||||
String priority2 = priority[1];
|
||||
VCFRecord vcf1 = rodMap.containsKey(priority1) ? rodMap.get(priority1).getRecord() : null;
|
||||
VCFRecord vcf2 = rodMap.containsKey(priority2) ? rodMap.get(priority2).getRecord() : null;
|
||||
VCFRecord vcf1 = rodMap.containsKey(priority1) ? rodMap.get(priority1) : null;
|
||||
VCFRecord vcf2 = rodMap.containsKey(priority2) ? rodMap.get(priority2) : null;
|
||||
|
||||
// for simplicity, we are setting set and call for vcf1
|
||||
String set = priority1;
|
||||
|
|
@ -213,9 +218,9 @@ public class VCFCombine extends RodWalker<VCFRecord, VCFWriter> {
|
|||
return call;
|
||||
} else {
|
||||
for ( String rodname : priority ) {
|
||||
for ( RodVCF rod : rods ) {
|
||||
if ( rod.getName().equals(rodname) )
|
||||
return rod.mCurrentRecord;
|
||||
for ( VCFRecord rod : rods.keySet() ) {
|
||||
if ( rods.get(rod).equals(rodname) )
|
||||
return rod;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,10 +29,10 @@ import org.apache.commons.jexl.Expression;
|
|||
import org.apache.commons.jexl.ExpressionFactory;
|
||||
import org.apache.commons.jexl.JexlContext;
|
||||
import org.apache.commons.jexl.JexlHelper;
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||
import org.broadinstitute.sting.gatk.walkers.RMD;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
|
|
@ -45,7 +45,7 @@ import java.util.*;
|
|||
/**
|
||||
* Selects variant calls for output from a user-supplied VCF file using a number of user-selectable, parameterizable criteria.
|
||||
*/
|
||||
@Requires(value={},referenceMetaData=@RMD(name="variant",type= RodVCF.class))
|
||||
@Requires(value={},referenceMetaData=@RMD(name="variant",type= VCFCodec.class))
|
||||
public class VCFSelectWalker extends RodWalker<Integer, Integer> {
|
||||
@Argument(fullName="match", shortName="match", doc="Expression used with INFO fields to select VCF records for inclusion in the output VCF(see wiki docs for more info)", required=false)
|
||||
protected String[] MATCH_STRINGS = new String[]{null};
|
||||
|
|
@ -66,7 +66,7 @@ public class VCFSelectWalker extends RodWalker<Integer, Integer> {
|
|||
|
||||
private List<MatchExp> matchExpressions = new ArrayList<MatchExp>();
|
||||
|
||||
private void initializeVcfWriter(RodVCF rod) {
|
||||
private void initializeVcfWriter(VCFRecord record) {
|
||||
// setup the header fields
|
||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
||||
|
|
@ -78,7 +78,7 @@ public class VCFSelectWalker extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
writer = new VCFWriter(out);
|
||||
writer.writeHeader(new VCFHeader(hInfo, rod.getHeader().getGenotypeSamples()));
|
||||
writer.writeHeader(new VCFHeader(hInfo, record.getHeader().getGenotypeSamples()));
|
||||
}
|
||||
|
||||
public void initialize() {
|
||||
|
|
@ -108,15 +108,15 @@ public class VCFSelectWalker extends RodWalker<Integer, Integer> {
|
|||
if ( tracker == null )
|
||||
return 0;
|
||||
|
||||
RodVCF variant = tracker.lookup("variant",RodVCF.class);
|
||||
VCFRecord variant = tracker.lookup("variant",VCFRecord.class);
|
||||
// ignore places where we don't have a variant
|
||||
if ( variant == null )
|
||||
return 0;
|
||||
|
||||
boolean someoneMatched = false;
|
||||
for ( MatchExp exp : matchExpressions ) {
|
||||
Map<String, String> infoMap = new HashMap<String, String>(variant.mCurrentRecord.getInfoValues());
|
||||
infoMap.put("QUAL", String.valueOf(variant.mCurrentRecord.getQual()));
|
||||
Map<String, String> infoMap = new HashMap<String, String>(variant.getInfoValues());
|
||||
infoMap.put("QUAL", String.valueOf(variant.getQual()));
|
||||
|
||||
JexlContext jContext = JexlHelper.createContext();
|
||||
jContext.setVars(infoMap);
|
||||
|
|
@ -139,13 +139,11 @@ public class VCFSelectWalker extends RodWalker<Integer, Integer> {
|
|||
return 1;
|
||||
}
|
||||
|
||||
private void writeVCF(RodVCF variant) {
|
||||
VCFRecord rec = variant.mCurrentRecord;
|
||||
|
||||
private void writeVCF(VCFRecord variant) {
|
||||
if ( writer == null )
|
||||
initializeVcfWriter(variant);
|
||||
|
||||
writer.addRecord(rec);
|
||||
writer.addRecord(variant);
|
||||
}
|
||||
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
|
|
|
|||
|
|
@ -25,14 +25,15 @@
|
|||
|
||||
package org.broadinstitute.sting.playground.gatk.walkers.vcftools;
|
||||
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
|
|
@ -80,9 +81,8 @@ public class VCFSubsetWalker extends RodWalker<ArrayList<VCFRecord>, VCFWriter>
|
|||
if (tracker != null) {
|
||||
for (GATKFeature feature : tracker.getAllRods()) {
|
||||
Object rod = feature.getUnderlyingObject();
|
||||
if (rod instanceof RodVCF) {
|
||||
RodVCF vcfrod = (RodVCF) rod;
|
||||
VCFRecord record = vcfrod.mCurrentRecord;
|
||||
if (rod instanceof VCFRecord) {
|
||||
VCFRecord vcfrod = (VCFRecord) rod;
|
||||
|
||||
if (SAMPLES == null) {
|
||||
SAMPLES = new HashSet<String>();
|
||||
|
|
@ -95,7 +95,7 @@ public class VCFSubsetWalker extends RodWalker<ArrayList<VCFRecord>, VCFWriter>
|
|||
|
||||
//out.println(record.toStringEncoding(vcfrod.getHeader()));
|
||||
|
||||
records.add(record);
|
||||
records.add(vcfrod);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -129,8 +129,8 @@ public class VCFSubsetWalker extends RodWalker<ArrayList<VCFRecord>, VCFWriter>
|
|||
}
|
||||
|
||||
VCFRecord subset = new VCFRecord(record.getReference(),
|
||||
record.getLocation().getContig(),
|
||||
(int) record.getLocation().getStart(),
|
||||
record.getChr(),
|
||||
record.getStart(),
|
||||
record.getID(),
|
||||
genotypeEncodings,
|
||||
record.getQual(),
|
||||
|
|
|
|||
|
|
@ -27,6 +27,8 @@ package org.broadinstitute.sting.playground.tools.vcf;
|
|||
|
||||
//import org.broadinstitute.sting.playground.tools.vcf.VCFOptimize.Cut;
|
||||
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
|
||||
|
|
@ -152,7 +154,7 @@ class VCFApplyCuts extends CommandLineProgram
|
|||
|
||||
VCFReader reader = null;
|
||||
|
||||
if (autocorrect) { reader = new VCFReader(VCFHomogenizer.create(filename)); }
|
||||
if (autocorrect) { reader = new VCFReader(new File(filename),new VCFHomogenizer()); }
|
||||
else { reader = new VCFReader(new File(filename)); }
|
||||
|
||||
VCFHeader header = reader.getHeader();
|
||||
|
|
|
|||
|
|
@ -24,15 +24,18 @@
|
|||
*/
|
||||
|
||||
package org.broadinstitute.sting.playground.tools.vcf;
|
||||
import org.broad.tribble.vcf.VCFGenotypeEncoding;
|
||||
import org.broad.tribble.vcf.VCFGenotypeRecord;
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
import net.sf.picard.util.Interval;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
|
||||
|
||||
|
||||
class VCFCallRates extends CommandLineProgram
|
||||
|
|
@ -62,7 +65,7 @@ class VCFCallRates extends CommandLineProgram
|
|||
|
||||
if (autocorrect)
|
||||
{
|
||||
reader = new VCFReader(VCFHomogenizer.create(filename));
|
||||
reader = new VCFReader(new File(filename),new VCFHomogenizer());
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
|||
|
|
@ -25,6 +25,8 @@
|
|||
|
||||
package org.broadinstitute.sting.playground.tools.vcf;
|
||||
|
||||
import org.broad.tribble.vcf.VCFCodec;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.zip.*;
|
||||
|
||||
|
|
@ -34,93 +36,10 @@ import java.util.zip.*;
|
|||
* @author jmaguire
|
||||
*/
|
||||
|
||||
class VCFHomogenizer extends InputStream
|
||||
{
|
||||
private BufferedReader in = null;
|
||||
private String currentLine;
|
||||
private ByteArrayInputStream stream;
|
||||
|
||||
public VCFHomogenizer(Reader in)
|
||||
{
|
||||
this.in = (BufferedReader)in;
|
||||
currentLine = this.readLine();
|
||||
stream = new ByteArrayInputStream(currentLine.getBytes());
|
||||
}
|
||||
|
||||
////////////////////////////////////////
|
||||
// InputStream methods
|
||||
|
||||
public int available()
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
public void close() throws java.io.IOException
|
||||
{
|
||||
this.in.close();
|
||||
}
|
||||
|
||||
public void mark(int readlimit)
|
||||
{
|
||||
System.err.println("not implemented");
|
||||
System.exit(-1);
|
||||
}
|
||||
|
||||
public void reset()
|
||||
{
|
||||
System.err.println("not implemented");
|
||||
System.exit(-1);
|
||||
}
|
||||
|
||||
public boolean markSupported()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
public int read()
|
||||
{
|
||||
if ((stream == null) || (stream.available() == 0))
|
||||
{
|
||||
currentLine = this.readLine();
|
||||
if (currentLine == null) { return -1; }
|
||||
stream = new ByteArrayInputStream(currentLine.getBytes());
|
||||
}
|
||||
return stream.read();
|
||||
}
|
||||
|
||||
// END InputStream methods
|
||||
/////////////////////////////////////
|
||||
|
||||
|
||||
public static VCFHomogenizer create(String filename)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (filename.endsWith(".gz"))
|
||||
{
|
||||
return new VCFHomogenizer(new BufferedReader(
|
||||
new InputStreamReader(
|
||||
new GZIPInputStream(
|
||||
new FileInputStream(filename)))));
|
||||
}
|
||||
else
|
||||
{
|
||||
return new VCFHomogenizer(new BufferedReader(
|
||||
new InputStreamReader(
|
||||
new FileInputStream(filename))));
|
||||
}
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
e.printStackTrace();
|
||||
System.exit(-1);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
class VCFHomogenizer implements VCFCodec.LineTransform {
|
||||
|
||||
//my ($chr, $off, $id, $ref, $alt, $qual, $filter, $info, $format, @genotypes) = @tokens;
|
||||
private String editLine(String input)
|
||||
public String lineTransform(String input)
|
||||
{
|
||||
if (input == null) { return null; }
|
||||
|
||||
|
|
@ -228,47 +147,6 @@ class VCFHomogenizer extends InputStream
|
|||
|
||||
return output;
|
||||
}
|
||||
|
||||
public String readLine()
|
||||
{
|
||||
try
|
||||
{
|
||||
String line = in.readLine();
|
||||
if (line == null) { return null; }
|
||||
else { return editLine(line + "\n"); }
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
e.printStackTrace();
|
||||
System.exit(-1);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/*
|
||||
public int read()
|
||||
{
|
||||
throw new RuntimeException("VCFHomogenizer.read() not implemented.");
|
||||
}
|
||||
|
||||
public byte read(byte[] b)
|
||||
{
|
||||
throw new RuntimeException("VCFHomogenizer.read(byte[]) not implemented.");
|
||||
}
|
||||
|
||||
public int read(byte[] b, int off, int len)
|
||||
{
|
||||
throw new RuntimeException("VCFHomogenizer.read(byte[], int, int) not implemented.");
|
||||
}
|
||||
|
||||
public long skip(long n)
|
||||
{
|
||||
throw new RuntimeException("VCFHomogenizer.skip(long) not implemented.");
|
||||
}
|
||||
|
||||
public boolean markSupported() { return false; }
|
||||
*/
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -24,6 +24,8 @@
|
|||
*/
|
||||
|
||||
package org.broadinstitute.sting.playground.tools.vcf;
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
|
||||
|
|
@ -63,8 +65,8 @@ class VCFMerge extends CommandLineProgram
|
|||
|
||||
if (autocorrect)
|
||||
{
|
||||
reader1 = new VCFReader(VCFHomogenizer.create(filename1));
|
||||
reader2 = new VCFReader(VCFHomogenizer.create(filename2));
|
||||
reader1 = new VCFReader(new File(filename1),new VCFHomogenizer());
|
||||
reader2 = new VCFReader(new File(filename2),new VCFHomogenizer());
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
|||
|
|
@ -24,10 +24,11 @@
|
|||
*/
|
||||
|
||||
package org.broadinstitute.sting.playground.tools.vcf;
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
|
||||
|
||||
|
||||
import java.io.*;
|
||||
|
|
@ -306,7 +307,7 @@ class VCFOptimize extends CommandLineProgram
|
|||
|
||||
VCFReader reader = null;
|
||||
|
||||
if (autocorrect) { reader = new VCFReader(VCFHomogenizer.create(filename)); }
|
||||
if (autocorrect) { reader = new VCFReader(new File(filename),new VCFHomogenizer()); }
|
||||
else { reader = new VCFReader(new File(filename)); }
|
||||
|
||||
PrintWriter output = null;
|
||||
|
|
|
|||
|
|
@ -24,14 +24,15 @@
|
|||
*/
|
||||
|
||||
package org.broadinstitute.sting.playground.tools.vcf;
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
import net.sf.picard.util.Interval;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
|
||||
|
||||
|
||||
class VCFSequenomAnalysis extends CommandLineProgram
|
||||
|
|
@ -66,8 +67,8 @@ class VCFSequenomAnalysis extends CommandLineProgram
|
|||
|
||||
if (autocorrect)
|
||||
{
|
||||
reader1 = new VCFReader(VCFHomogenizer.create(filename1));
|
||||
reader2 = new VCFReader(VCFHomogenizer.create(filename2));
|
||||
reader1 = new VCFReader(new File(filename1),new VCFHomogenizer());
|
||||
reader2 = new VCFReader(new File(filename2),new VCFHomogenizer());
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
|||
|
|
@ -24,16 +24,19 @@
|
|||
*/
|
||||
|
||||
package org.broadinstitute.sting.playground.tools.vcf;
|
||||
import org.broad.tribble.vcf.VCFGenotypeEncoding;
|
||||
import org.broad.tribble.vcf.VCFGenotypeRecord;
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
import java.lang.*;
|
||||
|
||||
import net.sf.picard.util.Interval;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
|
||||
|
||||
|
||||
class VCFSequenomAnalysis2 extends CommandLineProgram
|
||||
|
|
@ -68,8 +71,8 @@ class VCFSequenomAnalysis2 extends CommandLineProgram
|
|||
|
||||
if (autocorrect)
|
||||
{
|
||||
reader1 = new VCFReader(VCFHomogenizer.create(filename1));
|
||||
reader2 = new VCFReader(VCFHomogenizer.create(filename2));
|
||||
reader1 = new VCFReader(new File(filename1),new VCFHomogenizer());
|
||||
reader2 = new VCFReader(new File(filename2),new VCFHomogenizer());
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
|||
|
|
@ -24,6 +24,10 @@
|
|||
*/
|
||||
|
||||
package org.broadinstitute.sting.playground.tools.vcf;
|
||||
import org.broad.tribble.vcf.VCFGenotypeEncoding;
|
||||
import org.broad.tribble.vcf.VCFGenotypeRecord;
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
|
||||
|
|
@ -65,7 +69,7 @@ class VCFValidate extends CommandLineProgram
|
|||
|
||||
VCFReader reader = null;
|
||||
|
||||
if (autocorrect) { reader = new VCFReader(VCFHomogenizer.create(filename)); }
|
||||
if (autocorrect) { reader = new VCFReader(new File(filename),new VCFHomogenizer()); }
|
||||
else { reader = new VCFReader(new File(filename)); }
|
||||
|
||||
VCFHeader header = reader.getHeader();
|
||||
|
|
@ -123,8 +127,10 @@ class VCFStats extends CommandLineProgram
|
|||
String stop = tokens[2];
|
||||
Interval locus = new Interval(chr, Integer.parseInt(start), Integer.parseInt(stop));
|
||||
|
||||
if (autocorrect) { reader = new VCFReader(VCFHomogenizer.create(in_filename)); }
|
||||
else { reader = new VCFReader(new File(in_filename)); }
|
||||
if (autocorrect)
|
||||
reader = new VCFReader(new File(in_filename),new VCFHomogenizer());
|
||||
else
|
||||
reader = new VCFReader(new File(in_filename));
|
||||
|
||||
VCFHeader header = reader.getHeader();
|
||||
|
||||
|
|
@ -258,7 +264,7 @@ class CheckRefFields extends CommandLineProgram
|
|||
|
||||
VCFReader reader = null;
|
||||
|
||||
if (autocorrect) { reader = new VCFReader(VCFHomogenizer.create(filename)); }
|
||||
if (autocorrect) { reader = new VCFReader(new File(filename),new VCFHomogenizer()); }
|
||||
else { reader = new VCFReader(new File(filename)); }
|
||||
|
||||
ReferenceSequenceFileWalker ref = new ReferenceSequenceFileWalker(new File(fasta_filename));
|
||||
|
|
@ -274,7 +280,7 @@ class CheckRefFields extends CommandLineProgram
|
|||
{
|
||||
VCFRecord record = reader.next();
|
||||
|
||||
String chr = record.getLocation().getContig();
|
||||
String chr = record.getChr();
|
||||
if (! chr.equals(ref_seq_name))
|
||||
{
|
||||
System.out.println("Loading " + chr);
|
||||
|
|
@ -282,7 +288,7 @@ class CheckRefFields extends CommandLineProgram
|
|||
ref_seq_name = chr;
|
||||
}
|
||||
|
||||
long offset = record.getLocation().getStart();
|
||||
long offset = record.getStart();
|
||||
char vcf_ref_base = record.getReference().charAt(0);
|
||||
char fasta_ref_base = (char)ref_seq[(int)offset-1];
|
||||
|
||||
|
|
@ -319,7 +325,7 @@ class FixRefFields extends CommandLineProgram
|
|||
|
||||
VCFReader reader = null;
|
||||
|
||||
if (autocorrect) { reader = new VCFReader(VCFHomogenizer.create(filename)); }
|
||||
if (autocorrect) { reader = new VCFReader(new File(filename),new VCFHomogenizer()); }
|
||||
else { reader = new VCFReader(new File(filename)); }
|
||||
|
||||
ReferenceSequenceFileWalker ref = new ReferenceSequenceFileWalker(new File(fasta_filename));
|
||||
|
|
@ -349,7 +355,7 @@ class FixRefFields extends CommandLineProgram
|
|||
{
|
||||
VCFRecord record = reader.next();
|
||||
|
||||
String chr = record.getLocation().getContig();
|
||||
String chr = record.getChr();
|
||||
if (! chr.equals(ref_seq_name))
|
||||
{
|
||||
System.out.println("Loading " + chr);
|
||||
|
|
@ -357,7 +363,7 @@ class FixRefFields extends CommandLineProgram
|
|||
ref_seq_name = chr;
|
||||
}
|
||||
|
||||
long offset = record.getLocation().getStart();
|
||||
long offset = record.getStart();
|
||||
char vcf_ref_base = record.getReference().charAt(0);
|
||||
char fasta_ref_base = (char)ref_seq[(int)offset-1];
|
||||
|
||||
|
|
@ -498,7 +504,7 @@ class VCFGrep_old extends CommandLineProgram
|
|||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
if (autocorrect) { reader = new VCFReader(VCFHomogenizer.create(in_filename)); }
|
||||
if (autocorrect) { reader = new VCFReader(new File(in_filename),new VCFHomogenizer()); }
|
||||
else { reader = new VCFReader(new File(in_filename)); }
|
||||
|
||||
|
||||
|
|
@ -528,7 +534,7 @@ class PrintGQ extends CommandLineProgram
|
|||
VCFReader reader;
|
||||
VCFReader reader2;
|
||||
|
||||
reader = new VCFReader(VCFHomogenizer.create(filename));
|
||||
reader = new VCFReader(new File(filename),new VCFHomogenizer());
|
||||
|
||||
VCFHeader header = reader.getHeader();
|
||||
VCFRecord record = reader.next();
|
||||
|
|
@ -604,7 +610,7 @@ class VCFSimpleStats extends CommandLineProgram
|
|||
|
||||
if (autocorrect)
|
||||
{
|
||||
reader1 = new VCFReader(VCFHomogenizer.create(filename1));
|
||||
reader1 = new VCFReader(new File(filename1),new VCFHomogenizer());
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
@ -819,8 +825,8 @@ class VCFConcordance extends CommandLineProgram
|
|||
|
||||
if (autocorrect)
|
||||
{
|
||||
reader1 = new VCFReader(VCFHomogenizer.create(filename1));
|
||||
reader2 = new VCFReader(VCFHomogenizer.create(filename2));
|
||||
reader1 = new VCFReader(new File(filename1),new VCFHomogenizer());
|
||||
reader2 = new VCFReader(new File(filename2),new VCFHomogenizer());
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
@ -1266,8 +1272,8 @@ public class VCFTool
|
|||
|
||||
public static Interval getIntervalFromRecord(VCFRecord record)
|
||||
{
|
||||
String chr = record.getLocation().getContig();
|
||||
long off = record.getLocation().getStart();
|
||||
String chr = record.getChr();
|
||||
long off = record.getStart();
|
||||
return new Interval(chr, (int)off, (int)off);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -27,12 +27,13 @@ package org.broadinstitute.sting.utils;
|
|||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import org.broad.tribble.vcf.VCFCodec;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -79,7 +80,7 @@ public class SampleUtils {
|
|||
List<ReferenceOrderedDataSource> dataSources = toolkit.getRodDataSources();
|
||||
for ( ReferenceOrderedDataSource source : dataSources ) {
|
||||
RMDTrack rod = source.getReferenceOrderedData();
|
||||
if ( rod.getType().equals(RodVCF.class) ) {
|
||||
if ( rod.getType().equals(VCFRecord.class) ) {
|
||||
VCFReader reader = new VCFReader(rod.getFile());
|
||||
samples.addAll(reader.getHeader().getGenotypeSamples());
|
||||
reader.close();
|
||||
|
|
@ -108,7 +109,7 @@ public class SampleUtils {
|
|||
List<ReferenceOrderedDataSource> dataSources = toolkit.getRodDataSources();
|
||||
for ( ReferenceOrderedDataSource source : dataSources ) {
|
||||
RMDTrack rod = source.getReferenceOrderedData();
|
||||
if ( rod.getType().equals(RodVCF.class) ) {
|
||||
if ( rod.getType().equals(VCFCodec.class) ) {
|
||||
VCFReader reader = new VCFReader(rod.getFile());
|
||||
Set<String> vcfSamples = reader.getHeader().getGenotypeSamples();
|
||||
for ( String sample : vcfSamples )
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.genotype.geli.*;
|
||||
import org.broadinstitute.sting.utils.genotype.glf.*;
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.utils.genotype.glf;
|
|||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import net.sf.samtools.util.BinaryCodec;
|
||||
import net.sf.samtools.util.BlockCompressedOutputStream;
|
||||
import org.broad.tribble.vcf.VCFGenotypeRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableGenotype;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
|
@ -10,7 +11,6 @@ import org.broadinstitute.sting.utils.MathUtils;
|
|||
import org.broadinstitute.sting.utils.genotype.CalledGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.IndelLikelihood;
|
||||
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,60 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
* <p/>
|
||||
* Class VCFFilterHeaderLine
|
||||
* <p/>
|
||||
* A class representing a key=value entry for FILTER fields in the VCF header
|
||||
*/
|
||||
public class VCFFilterHeaderLine extends VCFHeaderLine {
|
||||
|
||||
private String mName;
|
||||
private String mDescription;
|
||||
|
||||
|
||||
/**
|
||||
* create a VCF filter header line
|
||||
*
|
||||
* @param name the name for this header line
|
||||
* @param description the description for this header line
|
||||
*/
|
||||
public VCFFilterHeaderLine(String name, String description) {
|
||||
super("FILTER", "");
|
||||
mName = name;
|
||||
mDescription = description;
|
||||
}
|
||||
|
||||
/**
|
||||
* create a VCF info header line
|
||||
*
|
||||
* @param line the header line
|
||||
*/
|
||||
protected VCFFilterHeaderLine(String line) {
|
||||
super("FILTER", "");
|
||||
String[] pieces = line.split(",");
|
||||
if ( pieces.length < 2 )
|
||||
throw new IllegalArgumentException("There are too few values in the VCF FILTER header line: " + line);
|
||||
|
||||
mName = pieces[0];
|
||||
mDescription = Utils.trim(pieces[1], '"');
|
||||
// just in case there were some commas in the description
|
||||
for (int i = 1; i < pieces.length; i++)
|
||||
mDescription += "," + Utils.trim(pieces[i], '"');
|
||||
}
|
||||
|
||||
protected String makeStringRep() {
|
||||
return String.format("FILTER=%s,\"%s\"", mName, mDescription);
|
||||
}
|
||||
|
||||
public boolean equals(Object o) {
|
||||
if ( !(o instanceof VCFFilterHeaderLine) )
|
||||
return false;
|
||||
VCFFilterHeaderLine other = (VCFFilterHeaderLine)o;
|
||||
return mName.equals(other.mName) &&
|
||||
mDescription.equals(other.mDescription);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,80 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
* <p/>
|
||||
* Class VCFFormatHeaderLine
|
||||
* <p/>
|
||||
* A class representing a key=value entry for genotype FORMAT fields in the VCF header
|
||||
*/
|
||||
public class VCFFormatHeaderLine extends VCFHeaderLine {
|
||||
|
||||
// the format field types
|
||||
public enum FORMAT_TYPE {
|
||||
Integer, Float, String
|
||||
}
|
||||
|
||||
private String mName;
|
||||
private int mCount;
|
||||
private String mDescription;
|
||||
private FORMAT_TYPE mType;
|
||||
|
||||
|
||||
/**
|
||||
* create a VCF format header line
|
||||
*
|
||||
* @param name the name for this header line
|
||||
* @param count the count for this header line
|
||||
* @param type the type for this header line
|
||||
* @param description the description for this header line
|
||||
*/
|
||||
public VCFFormatHeaderLine(String name, int count, FORMAT_TYPE type, String description) {
|
||||
super("FORMAT", "");
|
||||
mName = name;
|
||||
mCount = count;
|
||||
mType = type;
|
||||
mDescription = description;
|
||||
}
|
||||
|
||||
/**
|
||||
* create a VCF format header line
|
||||
*
|
||||
* @param line the header line
|
||||
*/
|
||||
protected VCFFormatHeaderLine(String line) {
|
||||
super("FORMAT", "");
|
||||
String[] pieces = line.split(",");
|
||||
if ( pieces.length < 4 )
|
||||
throw new IllegalArgumentException("There are too few values in the VCF FORMAT header line: " + line);
|
||||
|
||||
mName = pieces[0];
|
||||
mCount = Integer.valueOf(pieces[1]);
|
||||
mType = FORMAT_TYPE.valueOf(pieces[2]);
|
||||
mDescription = Utils.trim(pieces[3], '"');
|
||||
// just in case there were some commas in the description
|
||||
for (int i = 4; i < pieces.length; i++)
|
||||
mDescription += "," + Utils.trim(pieces[i], '"');
|
||||
}
|
||||
|
||||
protected String makeStringRep() {
|
||||
return String.format("FORMAT=%s,%d,%s,\"%s\"", mName, mCount, mType.toString(), mDescription);
|
||||
}
|
||||
|
||||
public String getName() { return mName; }
|
||||
public int getCount() { return mCount; }
|
||||
public String getDescription() { return mDescription; }
|
||||
public FORMAT_TYPE getType() { return mType; }
|
||||
|
||||
public boolean equals(Object o) {
|
||||
if ( !(o instanceof VCFFormatHeaderLine) )
|
||||
return false;
|
||||
VCFFormatHeaderLine other = (VCFFormatHeaderLine)o;
|
||||
return mName.equals(other.mName) &&
|
||||
mCount == other.mCount &&
|
||||
mDescription.equals(other.mDescription) &&
|
||||
mType == other.mType;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,127 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class VCFGenotypeEncoding
|
||||
* <p/>
|
||||
* basic encoding class for genotype fields in VCF
|
||||
*/
|
||||
public class VCFGenotypeEncoding {
|
||||
public enum TYPE {
|
||||
SINGLE_BASE,
|
||||
INSERTION,
|
||||
DELETION,
|
||||
UNCALLED,
|
||||
MIXED // this type is only valid in aggregate, not for a single VCFGenotypeEncoding
|
||||
}
|
||||
|
||||
// our length (0 for SINGLE_BASE), our bases, and our type
|
||||
private final int mLength;
|
||||
private final String mBases;
|
||||
private final TYPE mType;
|
||||
|
||||
// public constructor, that parses out the base string
|
||||
public VCFGenotypeEncoding(String baseString) {
|
||||
if ((baseString.length() == 1)) {
|
||||
// are we an empty (no-call) genotype?
|
||||
if (baseString.equals(VCFGenotypeRecord.EMPTY_ALLELE)) {
|
||||
mBases = VCFGenotypeRecord.EMPTY_ALLELE;
|
||||
mLength = 0;
|
||||
mType = TYPE.UNCALLED;
|
||||
} else if (!validBases(baseString)) {
|
||||
throw new IllegalArgumentException("Alleles of length 1 must be one of A,C,G,T, " + baseString + " was passed in");
|
||||
} else { // we're a valid base
|
||||
mBases = baseString.toUpperCase();
|
||||
mLength = 0;
|
||||
mType = TYPE.SINGLE_BASE;
|
||||
}
|
||||
} else { // deletion or insertion
|
||||
if (baseString.length() < 1 || (baseString.toUpperCase().charAt(0) != 'D' && baseString.toUpperCase().charAt(0) != 'I')) {
|
||||
throw new IllegalArgumentException("Genotype encoding of " + baseString + " was passed in, but is not a valid deletion, insertion, base, or no call (.)");
|
||||
}
|
||||
if (baseString.toUpperCase().charAt(0) == 'D') {
|
||||
mLength = Integer.valueOf(baseString.substring(1, baseString.length()));
|
||||
mBases = "";
|
||||
mType = TYPE.DELETION;
|
||||
} else { // we're an I
|
||||
mBases = baseString.substring(1, baseString.length()).toUpperCase();
|
||||
if (!validBases(mBases))
|
||||
throw new IllegalArgumentException("The insertion base string contained invalid bases -> " + baseString);
|
||||
mLength = mBases.length();
|
||||
mType = TYPE.INSERTION;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public int getLength() {
|
||||
return mLength;
|
||||
}
|
||||
|
||||
public String getBases() {
|
||||
return mBases;
|
||||
}
|
||||
|
||||
public TYPE getType() {
|
||||
return mType;
|
||||
}
|
||||
|
||||
public boolean equals(Object obj) {
|
||||
if ( obj == null )
|
||||
return false;
|
||||
if ( obj instanceof VCFGenotypeEncoding ) {
|
||||
VCFGenotypeEncoding d = (VCFGenotypeEncoding) obj;
|
||||
return (mType == d.mType) && (mBases.equals(d.mBases)) && (mLength == d.mLength);
|
||||
}
|
||||
if ( mType == TYPE.UNCALLED && obj.toString().equals(VCFGenotypeRecord.EMPTY_ALLELE) )
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
// our underlying data is immutable, so this is safe (we won't strand a value in a hashtable somewhere
|
||||
// when the data changes underneath, altering this value).
|
||||
String str = this.mBases + String.valueOf(this.mLength) + this.mType.toString();
|
||||
return str.hashCode();
|
||||
}
|
||||
|
||||
/**
|
||||
* dump the string representation of this genotype encoding
|
||||
*
|
||||
* @return string representation
|
||||
*/
|
||||
public String toString() {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
switch (mType) {
|
||||
case SINGLE_BASE:
|
||||
case UNCALLED:
|
||||
builder.append(mBases);
|
||||
break;
|
||||
case INSERTION:
|
||||
builder.append("I");
|
||||
builder.append(mBases);
|
||||
break;
|
||||
case DELETION:
|
||||
builder.append("D");
|
||||
builder.append(mLength);
|
||||
break;
|
||||
}
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* ensure that string contains valid bases
|
||||
*
|
||||
* @param bases the bases to check
|
||||
*
|
||||
* @return true if they're all either A,C,G,T; false otherwise
|
||||
*/
|
||||
private static boolean validBases(String bases) {
|
||||
for (char c : bases.toUpperCase().toCharArray()) {
|
||||
if (c != 'A' && c != 'C' && c != 'G' && c != 'T')
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,353 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class VCFGenotypeRecord
|
||||
* <p/>
|
||||
*/
|
||||
public class VCFGenotypeRecord {
|
||||
|
||||
// key names
|
||||
public static final String GENOTYPE_KEY = "GT";
|
||||
public static final String GENOTYPE_QUALITY_KEY = "GQ";
|
||||
public static final String DEPTH_KEY = "DP";
|
||||
public static final String HAPLOTYPE_QUALITY_KEY = "HQ";
|
||||
public static final String GENOTYPE_FILTER_KEY = "FT";
|
||||
public static final String GENOTYPE_POSTERIORS_TRIPLET_KEY = "GL";
|
||||
public static final String OLD_DEPTH_KEY = "RD";
|
||||
|
||||
// the values for empty fields
|
||||
public static final String EMPTY_GENOTYPE = "./.";
|
||||
public static final String EMPTY_ALLELE = ".";
|
||||
public static final int MISSING_GENOTYPE_QUALITY = -1;
|
||||
public static final int MISSING_DEPTH = -1;
|
||||
public static final int MISSING_HAPLOTYPE_QUALITY = -1;
|
||||
public static final String PASSES_FILTERS = "0";
|
||||
public static final String UNFILTERED = ".";
|
||||
|
||||
public static final double MAX_QUAL_VALUE = 99.0;
|
||||
|
||||
// what kind of phasing this genotype has
|
||||
public enum PHASE {
|
||||
UNPHASED, PHASED, PHASED_SWITCH_PROB, UNKNOWN
|
||||
}
|
||||
|
||||
// our record
|
||||
private VCFRecord mRecord;
|
||||
|
||||
// our phasing
|
||||
private PHASE mPhaseType;
|
||||
|
||||
// our bases(s)
|
||||
private final List<VCFGenotypeEncoding> mGenotypeAlleles = new ArrayList<VCFGenotypeEncoding>();
|
||||
|
||||
// our mapping of the format mFields to values
|
||||
private final Map<String, String> mFields = new HashMap<String, String>();
|
||||
|
||||
// our sample name
|
||||
private String mSampleName;
|
||||
|
||||
/**
|
||||
* Create a VCF genotype record
|
||||
*
|
||||
* @param sampleName sample name
|
||||
* @param genotypes list of genotypes
|
||||
* @param phasing phasing
|
||||
*/
|
||||
public VCFGenotypeRecord(String sampleName, List<VCFGenotypeEncoding> genotypes, PHASE phasing) {
|
||||
mSampleName = sampleName;
|
||||
if (genotypes != null)
|
||||
this.mGenotypeAlleles.addAll(genotypes);
|
||||
mPhaseType = phasing;
|
||||
}
|
||||
|
||||
public void setVCFRecord(VCFRecord record) {
|
||||
mRecord = record;
|
||||
}
|
||||
|
||||
public void setSampleName(String name) {
|
||||
mSampleName = name;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a field to the genotype record.
|
||||
* Throws an exception if the key is GT, as that's computed internally.
|
||||
*
|
||||
* @param key the field name (use static variables above for common fields)
|
||||
* @param value the field value
|
||||
*/
|
||||
public void setField(String key, String value) {
|
||||
// make sure the GT field isn't being set
|
||||
if ( key.equals(GENOTYPE_KEY) )
|
||||
throw new IllegalArgumentException("Setting the GT field is not allowed as that's done internally");
|
||||
mFields.put(key, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* determine the phase of the genotype
|
||||
*
|
||||
* @param phase the string that contains the phase character
|
||||
*
|
||||
* @return the phase
|
||||
*/
|
||||
static PHASE determinePhase(String phase) {
|
||||
// find the phasing information
|
||||
if (phase.equals("/"))
|
||||
return PHASE.UNPHASED;
|
||||
else if (phase.equals("|"))
|
||||
return PHASE.PHASED;
|
||||
else if (phase.equals("\\"))
|
||||
return PHASE.PHASED_SWITCH_PROB;
|
||||
else
|
||||
throw new IllegalArgumentException("Unknown genotype phasing parameter");
|
||||
}
|
||||
|
||||
|
||||
public PHASE getPhaseType() {
|
||||
return mPhaseType;
|
||||
}
|
||||
|
||||
public String getSampleName() {
|
||||
return mSampleName;
|
||||
}
|
||||
|
||||
public List<VCFGenotypeEncoding> getAlleles() {
|
||||
return mGenotypeAlleles;
|
||||
}
|
||||
|
||||
public Map<String, String> getFields() {
|
||||
return mFields;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the phred-scaled quality score
|
||||
*/
|
||||
public double getQual() {
|
||||
return ( mFields.containsKey(GENOTYPE_QUALITY_KEY) ? Double.valueOf(mFields.get(GENOTYPE_QUALITY_KEY)) : MISSING_GENOTYPE_QUALITY);
|
||||
}
|
||||
|
||||
public boolean isMissingQual() {
|
||||
return (int)getQual() == MISSING_GENOTYPE_QUALITY;
|
||||
}
|
||||
|
||||
public double getNegLog10PError() {
|
||||
double qual = getQual();
|
||||
return (qual == MISSING_GENOTYPE_QUALITY ? MISSING_GENOTYPE_QUALITY : qual / 10.0);
|
||||
}
|
||||
|
||||
public int getReadCount() {
|
||||
return ( mFields.containsKey(DEPTH_KEY) ? Integer.valueOf(mFields.get(DEPTH_KEY)) : MISSING_DEPTH);
|
||||
}
|
||||
|
||||
public GenomeLoc getLocation() {
|
||||
return mRecord != null ? mRecord.getLocation() : null;
|
||||
}
|
||||
|
||||
public String getReference() {
|
||||
return mRecord != null ? mRecord.getReference() : "N";
|
||||
}
|
||||
|
||||
public String getBases() {
|
||||
String genotype = "";
|
||||
for ( VCFGenotypeEncoding encoding : mGenotypeAlleles )
|
||||
genotype += encoding.getBases();
|
||||
return genotype;
|
||||
}
|
||||
|
||||
public boolean isVariant(char ref) {
|
||||
for ( VCFGenotypeEncoding encoding : mGenotypeAlleles ) {
|
||||
if ( encoding.getType() == VCFGenotypeEncoding.TYPE.UNCALLED )
|
||||
continue;
|
||||
if ( encoding.getType() != VCFGenotypeEncoding.TYPE.SINGLE_BASE ||
|
||||
encoding.getBases().charAt(0) != ref )
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean isPointGenotype() {
|
||||
return (mRecord != null ? !mRecord.isIndel() : true);
|
||||
}
|
||||
|
||||
public boolean isHom() {
|
||||
if ( mGenotypeAlleles.size() == 0 )
|
||||
return true;
|
||||
|
||||
String bases = mGenotypeAlleles.get(0).getBases();
|
||||
for ( int i = 1; i < mGenotypeAlleles.size(); i++ ) {
|
||||
if ( !bases.equals(mGenotypeAlleles.get(1).getBases()) )
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean isHet() {
|
||||
return !isHom();
|
||||
}
|
||||
|
||||
public boolean isNoCall() {
|
||||
for ( VCFGenotypeEncoding encoding : mGenotypeAlleles ) {
|
||||
if ( encoding.getType() != VCFGenotypeEncoding.TYPE.UNCALLED )
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean isFiltered() {
|
||||
return ( mFields.get(GENOTYPE_FILTER_KEY) != null &&
|
||||
!mFields.get(GENOTYPE_FILTER_KEY).equals(UNFILTERED) &&
|
||||
!mFields.get(GENOTYPE_FILTER_KEY).equals(PASSES_FILTERS));
|
||||
}
|
||||
|
||||
public int getPloidy() {
|
||||
return 2;
|
||||
}
|
||||
|
||||
public VCFRecord getRecord() {
|
||||
return mRecord;
|
||||
}
|
||||
|
||||
private String toGenotypeString(List<VCFGenotypeEncoding> altAlleles) {
|
||||
String str = "";
|
||||
boolean first = true;
|
||||
for (VCFGenotypeEncoding allele : mGenotypeAlleles) {
|
||||
if (allele.getType() == VCFGenotypeEncoding.TYPE.UNCALLED)
|
||||
str += VCFGenotypeRecord.EMPTY_ALLELE;
|
||||
else
|
||||
str += String.valueOf((altAlleles.contains(allele)) ? altAlleles.indexOf(allele) + 1 : 0);
|
||||
if (first) {
|
||||
switch (mPhaseType) {
|
||||
case UNPHASED:
|
||||
str += "/";
|
||||
break;
|
||||
case PHASED:
|
||||
str += "|";
|
||||
break;
|
||||
case PHASED_SWITCH_PROB:
|
||||
str += "\\";
|
||||
break;
|
||||
case UNKNOWN:
|
||||
throw new UnsupportedOperationException("Unknown phase type");
|
||||
}
|
||||
first = false;
|
||||
}
|
||||
}
|
||||
return str;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("[VCFGenotype %s %s %s %s]", getLocation(), mSampleName, this.mGenotypeAlleles, mFields);
|
||||
}
|
||||
|
||||
public boolean isEmptyGenotype() {
|
||||
for ( VCFGenotypeEncoding encoding : mGenotypeAlleles ) {
|
||||
if ( encoding.getType() != VCFGenotypeEncoding.TYPE.UNCALLED )
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean equals(Object other) {
|
||||
if (other instanceof VCFGenotypeRecord) {
|
||||
if (((VCFGenotypeRecord) other).mPhaseType != this.mPhaseType) return false;
|
||||
if (!((VCFGenotypeRecord) other).mGenotypeAlleles.equals(this.mGenotypeAlleles)) return false;
|
||||
if (!((VCFGenotypeRecord) other).mFields.equals(mFields)) return false;
|
||||
if (!((VCFGenotypeRecord) other).mSampleName.equals(this.mSampleName)) return false;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* output a string representation of the VCFGenotypeRecord, given the alternate alleles
|
||||
*
|
||||
* @param altAlleles the alternate alleles, needed for toGenotypeString()
|
||||
* @param genotypeFormatStrings genotype format strings
|
||||
*
|
||||
* @return a string
|
||||
*/
|
||||
public String toStringEncoding(List<VCFGenotypeEncoding> altAlleles, String[] genotypeFormatStrings) {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
builder.append(toGenotypeString(altAlleles));
|
||||
|
||||
for ( String field : genotypeFormatStrings ) {
|
||||
if ( field.equals(GENOTYPE_KEY) )
|
||||
continue;
|
||||
|
||||
String value = mFields.get(field);
|
||||
if ( value == null && field.equals(OLD_DEPTH_KEY) )
|
||||
value = mFields.get(DEPTH_KEY);
|
||||
|
||||
builder.append(VCFRecord.GENOTYPE_FIELD_SEPERATOR);
|
||||
if ( value == null || value.equals("") )
|
||||
builder.append(getMissingFieldValue(field));
|
||||
else
|
||||
builder.append(value);
|
||||
}
|
||||
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* output a string representation of an empty genotype
|
||||
*
|
||||
* @param genotypeFormatStrings genotype format strings
|
||||
*
|
||||
* @return a string
|
||||
*/
|
||||
public static String stringEncodingForEmptyGenotype(String[] genotypeFormatStrings) {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
builder.append(EMPTY_GENOTYPE);
|
||||
|
||||
for ( String field : genotypeFormatStrings ) {
|
||||
if ( field.equals(GENOTYPE_KEY) )
|
||||
continue;
|
||||
|
||||
builder.append(VCFRecord.GENOTYPE_FIELD_SEPERATOR);
|
||||
builder.append(getMissingFieldValue(field));
|
||||
}
|
||||
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
public static String getMissingFieldValue(String field) {
|
||||
String result = "";
|
||||
if ( field.equals(GENOTYPE_QUALITY_KEY) )
|
||||
result = String.valueOf(MISSING_GENOTYPE_QUALITY);
|
||||
else if ( field.equals(DEPTH_KEY) || field.equals(OLD_DEPTH_KEY) )
|
||||
result = String.valueOf(MISSING_DEPTH);
|
||||
else if ( field.equals(GENOTYPE_FILTER_KEY) )
|
||||
result = UNFILTERED;
|
||||
else if ( field.equals(GENOTYPE_POSTERIORS_TRIPLET_KEY) )
|
||||
result = "0,0,0";
|
||||
// TODO -- support haplotype quality
|
||||
//else if ( field.equals(HAPLOTYPE_QUALITY_KEY) )
|
||||
// result = String.valueOf(MISSING_HAPLOTYPE_QUALITY);
|
||||
return result;
|
||||
}
|
||||
|
||||
public static Set<VCFFormatHeaderLine> getSupportedHeaderStrings() {
|
||||
Set<VCFFormatHeaderLine> result = new HashSet<VCFFormatHeaderLine>();
|
||||
result.add(new VCFFormatHeaderLine(GENOTYPE_KEY, 1, VCFFormatHeaderLine.FORMAT_TYPE.String, "Genotype"));
|
||||
result.add(new VCFFormatHeaderLine(GENOTYPE_QUALITY_KEY, 1, VCFFormatHeaderLine.FORMAT_TYPE.Float, "Genotype Quality"));
|
||||
result.add(new VCFFormatHeaderLine(DEPTH_KEY, 1, VCFFormatHeaderLine.FORMAT_TYPE.Integer, "Read Depth (only filtered reads used for calling)"));
|
||||
result.add(new VCFFormatHeaderLine(GENOTYPE_POSTERIORS_TRIPLET_KEY, 3, VCFFormatHeaderLine.FORMAT_TYPE.Float, "Log-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; not applicable if site is not biallelic"));
|
||||
//result.add(new VCFFormatHeaderLine(HAPLOTYPE_QUALITY_KEY, 1, VCFFormatHeaderLine.INFO_TYPE.Integer, "Haplotype Quality"));
|
||||
return result;
|
||||
}
|
||||
|
||||
public void replaceFields(HashMap<String,String> newFields) {
|
||||
mFields.clear();
|
||||
for ( String s : newFields.keySet() ) {
|
||||
mFields.put(s,newFields.get(s));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
|
||||
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
||||
|
||||
import java.util.Set;
|
||||
|
|
@ -24,7 +26,7 @@ public interface VCFGenotypeWriter extends GenotypeWriter {
|
|||
* Add a given VCF record to the given output.
|
||||
* @param vcfRecord Record to add.
|
||||
*/
|
||||
public void addRecord(VCFRecord vcfRecord);
|
||||
public void addRecord(VCFRecord vcfRecord);
|
||||
|
||||
/**
|
||||
* set the validation stringency
|
||||
|
|
|
|||
|
|
@ -1,5 +1,9 @@
|
|||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
|
||||
import org.broad.tribble.vcf.VCFFormatHeaderLine;
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
||||
|
|
@ -64,7 +68,7 @@ public class VCFGenotypeWriterAdapter implements VCFGenotypeWriter {
|
|||
// set up the allowed genotype format fields
|
||||
allowedGenotypeFormatStrings = new ArrayList<String>();
|
||||
for ( VCFHeaderLine field : headerInfo ) {
|
||||
if ( field instanceof VCFFormatHeaderLine )
|
||||
if ( field instanceof VCFFormatHeaderLine)
|
||||
allowedGenotypeFormatStrings.add(((VCFFormatHeaderLine)field).getName());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,151 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class VCFHeader
|
||||
* <p/>
|
||||
* A class representing the VCF header
|
||||
*/
|
||||
public class VCFHeader {
|
||||
|
||||
public static final String FILE_FORMAT_KEY = "fileformat";
|
||||
public static final String OLD_FILE_FORMAT_KEY = "format"; // from version 3.2
|
||||
|
||||
|
||||
/** the current vcf version we support. */
|
||||
public static final String VCF_VERSION_HEADER = "VCFv";
|
||||
public static final String OLD_VCF_VERSION_HEADER = "VCRv"; // from version 3.2
|
||||
public static final double VCF_VERSION_NUMBER = 3.3;
|
||||
public static final String VCF_VERSION = VCF_VERSION_HEADER + VCF_VERSION_NUMBER;
|
||||
|
||||
|
||||
// the manditory header fields
|
||||
public enum HEADER_FIELDS {
|
||||
CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO
|
||||
}
|
||||
|
||||
// the associated meta data
|
||||
private final Set<VCFHeaderLine> mMetaData;
|
||||
|
||||
// the list of auxillary tags
|
||||
private final Set<String> mGenotypeSampleNames = new LinkedHashSet<String>();
|
||||
|
||||
// the character string that indicates meta data
|
||||
public static final String METADATA_INDICATOR = "##";
|
||||
|
||||
// the header string indicator
|
||||
public static final String HEADER_INDICATOR = "#";
|
||||
|
||||
|
||||
/** do we have genotying data? */
|
||||
private boolean hasGenotypingData = false;
|
||||
|
||||
/**
|
||||
* create a VCF header, given a list of meta data and auxillary tags
|
||||
*
|
||||
* @param metaData the meta data associated with this header
|
||||
*/
|
||||
public VCFHeader(Set<VCFHeaderLine> metaData) {
|
||||
mMetaData = new TreeSet<VCFHeaderLine>(metaData);
|
||||
checkVCFVersion();
|
||||
}
|
||||
|
||||
/**
|
||||
* create a VCF header, given a list of meta data and auxillary tags
|
||||
*
|
||||
* @param metaData the meta data associated with this header
|
||||
* @param genotypeSampleNames the genotype format field, and the sample names
|
||||
*/
|
||||
public VCFHeader(Set<VCFHeaderLine> metaData, Set<String> genotypeSampleNames) {
|
||||
mMetaData = new TreeSet<VCFHeaderLine>(metaData);
|
||||
for (String col : genotypeSampleNames) {
|
||||
if (!col.equals("FORMAT"))
|
||||
mGenotypeSampleNames.add(col);
|
||||
}
|
||||
if (genotypeSampleNames.size() > 0) hasGenotypingData = true;
|
||||
checkVCFVersion();
|
||||
}
|
||||
|
||||
/**
|
||||
* check our metadata for a VCF version tag, and throw an exception if the version is out of date
|
||||
* or the version is not present
|
||||
*/
|
||||
public void checkVCFVersion() {
|
||||
String version = null;
|
||||
for ( VCFHeaderLine line : mMetaData ) {
|
||||
if ( line.getKey().equals(FILE_FORMAT_KEY) || line.getKey().equals(OLD_FILE_FORMAT_KEY) ) {
|
||||
version = line.getValue();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ( version == null )
|
||||
mMetaData.add(new VCFHeaderLine(FILE_FORMAT_KEY, VCF_VERSION));
|
||||
else if ( !isSupportedVersion(version) )
|
||||
throw new RuntimeException("VCF version " + version +
|
||||
" is not yet supported; only version " + VCF_VERSION + " and earlier can be used");
|
||||
}
|
||||
|
||||
private boolean isSupportedVersion(String version) {
|
||||
if ( !version.startsWith(VCF_VERSION_HEADER) && !version.startsWith(OLD_VCF_VERSION_HEADER) )
|
||||
return false;
|
||||
try {
|
||||
double dVersion = Double.valueOf(version.substring(VCF_VERSION_HEADER.length()));
|
||||
return dVersion <= VCF_VERSION_NUMBER;
|
||||
} catch (Exception e) { }
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the header fields in order they're presented in the input file (which is now required to be
|
||||
* the order presented in the spec).
|
||||
*
|
||||
* @return a set of the header fields, in order
|
||||
*/
|
||||
public Set<HEADER_FIELDS> getHeaderFields() {
|
||||
Set<HEADER_FIELDS> fields = new LinkedHashSet<HEADER_FIELDS>();
|
||||
for (HEADER_FIELDS field : HEADER_FIELDS.values())
|
||||
fields.add(field);
|
||||
return fields;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the meta data, associated with this header
|
||||
*
|
||||
* @return a set of the meta data
|
||||
*/
|
||||
public Set<VCFHeaderLine> getMetaData() {
|
||||
return mMetaData;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the genotyping sample names
|
||||
*
|
||||
* @return a list of the genotype column names, which may be empty if hasGenotypingData() returns false
|
||||
*/
|
||||
public Set<String> getGenotypeSamples() {
|
||||
return mGenotypeSampleNames;
|
||||
}
|
||||
|
||||
/**
|
||||
* do we have genotyping data?
|
||||
*
|
||||
* @return true if we have genotyping columns, false otherwise
|
||||
*/
|
||||
public boolean hasGenotypingData() {
|
||||
return hasGenotypingData;
|
||||
}
|
||||
|
||||
/** @return the column count, */
|
||||
public int getColumnCount() {
|
||||
return HEADER_FIELDS.values().length + ((hasGenotypingData) ? mGenotypeSampleNames.size() + 1 : 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,86 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
* <p/>
|
||||
* Class VCFHeaderLine
|
||||
* <p/>
|
||||
* A class representing a key=value entry in the VCF header
|
||||
*/
|
||||
public class VCFHeaderLine implements Comparable {
|
||||
|
||||
private String stringRep = null;
|
||||
private String mKey = null;
|
||||
private String mValue = null;
|
||||
|
||||
|
||||
/**
|
||||
* create a VCF header line
|
||||
*
|
||||
* @param key the key for this header line
|
||||
* @param value the value for this header line
|
||||
*/
|
||||
public VCFHeaderLine(String key, String value) {
|
||||
mKey = key;
|
||||
mValue = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the key
|
||||
*
|
||||
* @return the key
|
||||
*/
|
||||
public String getKey() {
|
||||
return mKey;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the key
|
||||
*
|
||||
* @param key the key for this header line
|
||||
*/
|
||||
public void setKey(String key) {
|
||||
mKey = key;
|
||||
stringRep = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the value
|
||||
*
|
||||
* @return the value
|
||||
*/
|
||||
public String getValue() {
|
||||
return mValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the value
|
||||
*
|
||||
* @param value the value for this header line
|
||||
*/
|
||||
public void setValue(String value) {
|
||||
mValue = value;
|
||||
stringRep = null;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
if ( stringRep == null )
|
||||
stringRep = makeStringRep();
|
||||
return stringRep;
|
||||
}
|
||||
|
||||
protected String makeStringRep() {
|
||||
return mKey + "=" + mValue;
|
||||
}
|
||||
|
||||
public boolean equals(Object o) {
|
||||
if ( !(o instanceof VCFHeaderLine) )
|
||||
return false;
|
||||
return mKey.equals(((VCFHeaderLine)o).getKey()) && mValue.equals(((VCFHeaderLine)o).getValue());
|
||||
}
|
||||
|
||||
public int compareTo(Object other) {
|
||||
return toString().compareTo(other.toString());
|
||||
}
|
||||
}
|
||||
|
|
@ -1,75 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
* <p/>
|
||||
* Class VCFInfoHeaderLine
|
||||
* <p/>
|
||||
* A class representing a key=value entry for INFO fields in the VCF header
|
||||
*/
|
||||
public class VCFInfoHeaderLine extends VCFHeaderLine {
|
||||
|
||||
// the info field types
|
||||
public enum INFO_TYPE {
|
||||
Integer, Float, String, Character, Flag
|
||||
}
|
||||
|
||||
private String mName;
|
||||
private int mCount;
|
||||
private String mDescription;
|
||||
private INFO_TYPE mType;
|
||||
|
||||
|
||||
/**
|
||||
* create a VCF info header line
|
||||
*
|
||||
* @param name the name for this header line
|
||||
* @param count the count for this header line
|
||||
* @param type the type for this header line
|
||||
* @param description the description for this header line
|
||||
*/
|
||||
public VCFInfoHeaderLine(String name, int count, INFO_TYPE type, String description) {
|
||||
super("INFO", "");
|
||||
mName = name;
|
||||
mCount = count;
|
||||
mType = type;
|
||||
mDescription = description;
|
||||
}
|
||||
|
||||
/**
|
||||
* create a VCF info header line
|
||||
*
|
||||
* @param line the header line
|
||||
*/
|
||||
protected VCFInfoHeaderLine(String line) {
|
||||
super("INFO", "");
|
||||
String[] pieces = line.split(",");
|
||||
if ( pieces.length < 4 )
|
||||
throw new IllegalArgumentException("There are too few values in the VCF INFO header line: " + line);
|
||||
|
||||
mName = pieces[0];
|
||||
mCount = Integer.valueOf(pieces[1]);
|
||||
mType = INFO_TYPE.valueOf(pieces[2]);
|
||||
mDescription = Utils.trim(pieces[3], '"');
|
||||
// just in case there were some commas in the description
|
||||
for (int i = 4; i < pieces.length; i++)
|
||||
mDescription += "," + Utils.trim(pieces[i], '"');
|
||||
}
|
||||
|
||||
protected String makeStringRep() {
|
||||
return String.format("INFO=%s,%d,%s,\"%s\"", mName, mCount, mType.toString(), mDescription);
|
||||
}
|
||||
|
||||
public boolean equals(Object o) {
|
||||
if ( !(o instanceof VCFInfoHeaderLine) )
|
||||
return false;
|
||||
VCFInfoHeaderLine other = (VCFInfoHeaderLine)o;
|
||||
return mName.equals(other.mName) &&
|
||||
mCount == other.mCount &&
|
||||
mDescription.equals(other.mDescription) &&
|
||||
mType == other.mType;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,5 +1,8 @@
|
|||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
|
||||
import org.broad.tribble.vcf.VCFGenotypeEncoding;
|
||||
import org.broad.tribble.vcf.VCFGenotypeRecord;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
|
|
|
|||
|
|
@ -20,6 +20,9 @@ import java.util.regex.Matcher;
|
|||
import java.util.regex.Pattern;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
|
||||
import org.broad.tribble.FeatureReader;
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
/** The VCFReader class, which given a valid vcf file, parses out the header and VCF records */
|
||||
|
|
@ -28,14 +31,11 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
|
|||
// our VCF header
|
||||
private VCFHeader mHeader;
|
||||
|
||||
// our buffered input stream
|
||||
private BufferedReader mReader;
|
||||
// our iterator
|
||||
private Iterator<VCFRecord> iterator;
|
||||
|
||||
// our next record
|
||||
private VCFRecord mNextRecord = null;
|
||||
|
||||
// our pattern matching for the genotype mFields
|
||||
private static final Pattern gtPattern = Pattern.compile("([0-9\\.]+)([\\\\|\\/])([0-9\\.]*)");
|
||||
// our feature reader; so we can close it
|
||||
private FeatureReader<VCFRecord> vcfReader = null;
|
||||
|
||||
/**
|
||||
* Create a VCF reader, given a VCF file
|
||||
|
|
@ -43,89 +43,36 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
|
|||
* @param vcfFile the vcf file to write
|
||||
*/
|
||||
public VCFReader(File vcfFile) {
|
||||
if (vcfFile.getName().endsWith(".gz"))
|
||||
openGZipFile(vcfFile);
|
||||
else
|
||||
openTextVersion(vcfFile);
|
||||
|
||||
this.parseHeader();
|
||||
initialize(vcfFile, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a VCF reader, given a stream.
|
||||
* Create a VCF reader, given a VCF file
|
||||
*
|
||||
* @param stream the stream file read
|
||||
* @param vcfFile the vcf file to write
|
||||
*/
|
||||
public VCFReader (InputStream stream)
|
||||
{
|
||||
mReader = new BufferedReader(
|
||||
new InputStreamReader(
|
||||
stream,
|
||||
Charset.forName("UTF-8")));
|
||||
this.parseHeader();
|
||||
}
|
||||
|
||||
/**
|
||||
* parse the header from the top of the file. We read lines that match
|
||||
* the header pattern, and try to create a header. We then create the first
|
||||
* record from the first non-header line in the file.
|
||||
*/
|
||||
private void parseHeader() {
|
||||
String line = null;
|
||||
// try and parse the header
|
||||
try {
|
||||
ArrayList<String> lines = new ArrayList<String>();
|
||||
line = mReader.readLine();
|
||||
while (line != null && line.startsWith("#")) {
|
||||
lines.add(line);
|
||||
line = mReader.readLine();
|
||||
}
|
||||
// try to make a header from the lines we parsed
|
||||
mHeader = this.createHeader(lines);
|
||||
|
||||
// if the last line we parsed is null, we shouldn't try to make a record of it
|
||||
if (line != null) mNextRecord = createRecord(line, mHeader);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("VCFReader: Failed to parse VCF File on line: " + line, e);
|
||||
}
|
||||
public VCFReader(File vcfFile, VCFCodec.LineTransform transform) {
|
||||
initialize(vcfFile, transform);
|
||||
}
|
||||
|
||||
/**
|
||||
* open a g-zipped version of the VCF format
|
||||
*
|
||||
* @param vcfGZipFile the file to open
|
||||
*/
|
||||
private void openGZipFile(File vcfGZipFile) {
|
||||
private void initialize(File vcfFile, VCFCodec.LineTransform transform) {
|
||||
VCFCodec codec = new VCFCodec();
|
||||
if (transform != null) codec.setTransformer(transform);
|
||||
try {
|
||||
mReader = new BufferedReader(
|
||||
new InputStreamReader(new GZIPInputStream(
|
||||
new FileInputStream(vcfGZipFile))));
|
||||
vcfReader = new FeatureReader(vcfFile,codec);
|
||||
iterator= vcfReader.iterator();
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new RuntimeException("VCFReader: Unable to find VCF file: " + vcfGZipFile, e);
|
||||
throw new StingException("Unable to read VCF File from " + vcfFile, e);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("VCFReader: A problem occured trying to open the file using the gzipped decompressor, filename: " + vcfGZipFile, e);
|
||||
throw new StingException("Unable to read VCF File from " + vcfFile, e);
|
||||
}
|
||||
mHeader = codec.getHeader();
|
||||
}
|
||||
|
||||
/**
|
||||
* open the vcf file as a text file
|
||||
*
|
||||
* @param vcfFile the vcf file name
|
||||
*/
|
||||
private void openTextVersion(File vcfFile) {
|
||||
try {
|
||||
mReader = new BufferedReader(
|
||||
new InputStreamReader(
|
||||
new FileInputStream(vcfFile),
|
||||
Charset.forName("UTF-8")));
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new RuntimeException("VCFReader: Unable to find VCF text file: " + vcfFile, e);
|
||||
}
|
||||
}
|
||||
|
||||
/** @return true if we have another VCF record to return */
|
||||
public boolean hasNext() {
|
||||
return (mNextRecord != null);
|
||||
return (iterator.hasNext());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -134,15 +81,7 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
|
|||
* @return a VCFRecord, representing the next record in the file
|
||||
*/
|
||||
public VCFRecord next() {
|
||||
VCFRecord rec = mNextRecord;
|
||||
try {
|
||||
String line = mReader.readLine();
|
||||
if (line == null) mNextRecord = null;
|
||||
else mNextRecord = createRecord(line, mHeader);
|
||||
} catch (IOException e) {
|
||||
mNextRecord = null;
|
||||
}
|
||||
return rec;
|
||||
return iterator.next();
|
||||
}
|
||||
|
||||
/** Remove is not supported */
|
||||
|
|
@ -150,196 +89,6 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
|
|||
throw new UnsupportedOperationException("Unsupported operation");
|
||||
}
|
||||
|
||||
/**
|
||||
* create a VCF header, given an array of strings that all start with at least the # character. This function is
|
||||
* package protected so that the VCFReader can access this function
|
||||
*
|
||||
* @param headerStrings a list of header strings
|
||||
*
|
||||
* @return a VCF Header created from the list of stinrgs
|
||||
*/
|
||||
protected VCFHeader createHeader(List<String> headerStrings) {
|
||||
Set<VCFHeaderLine> metaData = new TreeSet<VCFHeaderLine>();
|
||||
Set<String> auxTags = new LinkedHashSet<String>();
|
||||
// iterate over all the passed in strings
|
||||
for ( String str : headerStrings ) {
|
||||
if ( !str.startsWith("##") ) {
|
||||
String[] strings = str.substring(1).split("\t");
|
||||
// the columns should be in order according to Richard Durbin
|
||||
int arrayIndex = 0;
|
||||
for (VCFHeader.HEADER_FIELDS field : VCFHeader.HEADER_FIELDS.values()) {
|
||||
try {
|
||||
if (field != VCFHeader.HEADER_FIELDS.valueOf(strings[arrayIndex]))
|
||||
throw new RuntimeException("VCFReader: we were expecting column name " + field + " but we saw " + strings[arrayIndex]);
|
||||
} catch (IllegalArgumentException e) {
|
||||
throw new RuntimeException("VCFReader: Unknown column name \"" + strings[arrayIndex] + "\", it does not match a known column header name.");
|
||||
}
|
||||
arrayIndex++;
|
||||
}
|
||||
while (arrayIndex < strings.length) {
|
||||
if (!strings[arrayIndex].equals("FORMAT"))
|
||||
auxTags.add(strings[arrayIndex]);
|
||||
arrayIndex++;
|
||||
}
|
||||
} else {
|
||||
if ( str.startsWith("##INFO=") )
|
||||
metaData.add(new VCFInfoHeaderLine(str.substring(7)));
|
||||
else if ( str.startsWith("##FILTER=") )
|
||||
metaData.add(new VCFFilterHeaderLine(str.substring(9)));
|
||||
else if ( str.startsWith("##FORMAT=") )
|
||||
metaData.add(new VCFFormatHeaderLine(str.substring(9)));
|
||||
else {
|
||||
int equals = str.indexOf("=");
|
||||
if ( equals != -1 )
|
||||
metaData.add(new VCFHeaderLine(str.substring(2, equals), str.substring(equals+1)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new VCFHeader(metaData, auxTags);
|
||||
}
|
||||
|
||||
/**
|
||||
* create the next VCFRecord, given the input line
|
||||
*
|
||||
* @param line the line from the file
|
||||
* @param mHeader the VCF header
|
||||
* @return the VCFRecord
|
||||
*/
|
||||
public static VCFRecord createRecord(String line, VCFHeader mHeader) {
|
||||
try {
|
||||
// things we need to make a VCF record
|
||||
Map<VCFHeader.HEADER_FIELDS, String> values = new HashMap<VCFHeader.HEADER_FIELDS, String>();
|
||||
String tokens[] = line.split("\t");
|
||||
|
||||
// check to ensure that the column count of tokens is right
|
||||
if (tokens.length != mHeader.getColumnCount()) {
|
||||
throw new RuntimeException("The input file line doesn't contain enough fields, it should have " + mHeader.getColumnCount() + " fields, it has " + tokens.length + ". Line = " + line);
|
||||
}
|
||||
|
||||
int index = 0;
|
||||
for (VCFHeader.HEADER_FIELDS field : mHeader.getHeaderFields())
|
||||
values.put(field, tokens[index++]);
|
||||
// if we have genotyping data, we try and extract the genotype fields
|
||||
if (mHeader.hasGenotypingData()) {
|
||||
String mFormatString = tokens[index];
|
||||
String keyStrings[] = mFormatString.split(":");
|
||||
List<VCFGenotypeRecord> genotypeRecords = new ArrayList<VCFGenotypeRecord>();
|
||||
index++;
|
||||
String[] alt_alleles = values.get(VCFHeader.HEADER_FIELDS.ALT).split(",");
|
||||
for (String str : mHeader.getGenotypeSamples()) {
|
||||
genotypeRecords.add(getVCFGenotype(str, keyStrings, tokens[index], alt_alleles, values.get(VCFHeader.HEADER_FIELDS.REF).charAt(0)));
|
||||
index++;
|
||||
}
|
||||
VCFRecord vrec = new VCFRecord(values, mFormatString, genotypeRecords);
|
||||
// associate the genotypes with this new record
|
||||
for (VCFGenotypeRecord gr : genotypeRecords)
|
||||
gr.setVCFRecord(vrec);
|
||||
return vrec;
|
||||
|
||||
}
|
||||
return new VCFRecord(values);
|
||||
} catch (Exception e) {
|
||||
throw new VCFParseException("VCF Reader failed to parsing, on line = " + line, e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* generate a VCF genotype record, given it's format string, the genotype string, and allele info
|
||||
*
|
||||
* @param sampleName the sample name
|
||||
* @param formatString the format string for this record, which contains the keys for the genotype parameters
|
||||
* @param genotypeString contains the phasing information, allele information, and values for genotype parameters
|
||||
* @param altAlleles the alternate allele string array, which we index into based on the field parameters
|
||||
* @param referenceBase the reference base
|
||||
*
|
||||
* @return a VCFGenotypeRecord
|
||||
*/
|
||||
public static VCFGenotypeRecord getVCFGenotype(String sampleName, String formatString, String genotypeString, String altAlleles[], char referenceBase) {
|
||||
return getVCFGenotype(sampleName, formatString.split(":"), genotypeString, altAlleles, referenceBase);
|
||||
}
|
||||
|
||||
/**
|
||||
* generate a VCF genotype record, given it's format string, the genotype string, and allele info
|
||||
*
|
||||
* @param sampleName the sample name
|
||||
* @param keyStrings the split format string for this record, which contains the keys for the genotype parameters
|
||||
* @param genotypeString contains the phasing information, allele information, and values for genotype parameters
|
||||
* @param altAlleles the alternate allele string array, which we index into based on the field parameters
|
||||
* @param referenceBase the reference base
|
||||
*
|
||||
* @return a VCFGenotypeRecord
|
||||
*/
|
||||
public static VCFGenotypeRecord getVCFGenotype(String sampleName, String[] keyStrings, String genotypeString, String altAlleles[], char referenceBase) {
|
||||
// parameters to create the VCF genotype record
|
||||
HashMap<String, String> tagToValue = new HashMap<String, String>();
|
||||
VCFGenotypeRecord.PHASE phase = VCFGenotypeRecord.PHASE.UNKNOWN;
|
||||
List<VCFGenotypeEncoding> bases = new ArrayList<VCFGenotypeEncoding>();
|
||||
|
||||
for (String key : keyStrings) {
|
||||
String parse;
|
||||
int nextDivider;
|
||||
if (!genotypeString.contains(":")) {
|
||||
nextDivider = genotypeString.length();
|
||||
parse = genotypeString;
|
||||
} else {
|
||||
nextDivider = (genotypeString.indexOf(":") > genotypeString.length()) ? genotypeString.length() : genotypeString.indexOf(":");
|
||||
parse = genotypeString.substring(0, nextDivider);
|
||||
}
|
||||
if (key.equals(VCFGenotypeRecord.GENOTYPE_KEY)) {
|
||||
Matcher m = gtPattern.matcher(parse);
|
||||
if (!m.matches())
|
||||
throw new RuntimeException("VCFReader: Unable to match GT genotype flag to it's expected pattern, the field was: " + parse);
|
||||
phase = VCFGenotypeRecord.determinePhase(m.group(2));
|
||||
addAllele(m.group(1), altAlleles, referenceBase, bases);
|
||||
if (m.group(3).length() > 0) addAllele(m.group(3), altAlleles, referenceBase, bases);
|
||||
} else {
|
||||
if ( parse.length() == 0 )
|
||||
parse = VCFGenotypeRecord.getMissingFieldValue(key);
|
||||
tagToValue.put(key, parse);
|
||||
}
|
||||
if (nextDivider + 1 >= genotypeString.length()) nextDivider = genotypeString.length() - 1;
|
||||
genotypeString = genotypeString.substring(nextDivider + 1, genotypeString.length());
|
||||
}
|
||||
if ( bases.size() > 0 && bases.get(0).equals(VCFGenotypeRecord.EMPTY_ALLELE) )
|
||||
tagToValue.clear();
|
||||
// catch some common errors, either there are too many field keys or there are two many field values
|
||||
else if ( keyStrings.length != tagToValue.size() + ((bases.size() > 0) ? 1 : 0))
|
||||
throw new RuntimeException("VCFReader: genotype value count doesn't match the key count (expected "
|
||||
+ keyStrings.length + " but saw " + tagToValue.size() + ")");
|
||||
else if ( genotypeString.length() > 0 )
|
||||
throw new RuntimeException("VCFReader: genotype string contained additional unprocessed fields: " + genotypeString
|
||||
+ ". This most likely means that the format string is shorter then the value fields.");
|
||||
|
||||
VCFGenotypeRecord rec = new VCFGenotypeRecord(sampleName, bases, phase);
|
||||
for ( Map.Entry<String, String> entry : tagToValue.entrySet() )
|
||||
rec.setField(entry.getKey(), entry.getValue());
|
||||
return rec;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* add an alternate allele to the list of alleles we have for a VCF genotype record
|
||||
*
|
||||
* @param alleleNumber the allele number, as a string
|
||||
* @param altAlleles the list of alternate alleles
|
||||
* @param referenceBase the reference base
|
||||
* @param bases the list of bases for this genotype call
|
||||
*/
|
||||
private static void addAllele(String alleleNumber, String[] altAlleles, char referenceBase, List<VCFGenotypeEncoding> bases) {
|
||||
if (alleleNumber.equals(VCFGenotypeRecord.EMPTY_ALLELE)) {
|
||||
bases.add(new VCFGenotypeEncoding(VCFGenotypeRecord.EMPTY_ALLELE));
|
||||
} else {
|
||||
int alleleValue = Integer.valueOf(alleleNumber);
|
||||
// check to make sure the allele value is within bounds
|
||||
if (alleleValue < 0 || alleleValue > altAlleles.length)
|
||||
throw new IllegalArgumentException("VCFReader: the allele value of " + alleleValue + " is out of bounds given the alternate allele list.");
|
||||
if (alleleValue == 0)
|
||||
bases.add(new VCFGenotypeEncoding(String.valueOf(referenceBase)));
|
||||
else
|
||||
bases.add(new VCFGenotypeEncoding(altAlleles[alleleValue - 1]));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** @return get the header associated with this reader */
|
||||
|
|
@ -352,11 +101,12 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
|
|||
}
|
||||
|
||||
public void close() {
|
||||
try {
|
||||
mReader.close();
|
||||
if (vcfReader != null) try {
|
||||
vcfReader.close();
|
||||
} catch (IOException e) {
|
||||
// we don't really care
|
||||
Utils.warnUser("Unable to close VCF reader file, this is not fatal, but is worth noting");
|
||||
throw new StingException("Unable to close vcfReader",e);
|
||||
}
|
||||
iterator = null;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,658 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
|
||||
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* the basic VCF record type
|
||||
*/
|
||||
public class VCFRecord {
|
||||
|
||||
// standard info field keys
|
||||
public static final String ANCESTRAL_ALLELE_KEY = "AA";
|
||||
public static final String ALLELE_COUNT_KEY = "AC";
|
||||
public static final String ALLELE_FREQUENCY_KEY = "AF";
|
||||
public static final String ALLELE_NUMBER_KEY = "AN";
|
||||
public static final String RMS_BASE_QUALITY_KEY = "BQ";
|
||||
public static final String DBSNP_KEY = "DB";
|
||||
public static final String DEPTH_KEY = "DP";
|
||||
public static final String HAPMAP2_KEY = "H2";
|
||||
public static final String HAPMAP3_KEY = "H3";
|
||||
public static final String RMS_MAPPING_QUALITY_KEY = "MQ";
|
||||
public static final String SAMPLE_NUMBER_KEY = "NS";
|
||||
public static final String STRAND_BIAS_KEY = "SB";
|
||||
|
||||
// commonly used strings that are in the standard
|
||||
public static final String FORMAT_FIELD_SEPERATOR = ":";
|
||||
public static final String GENOTYPE_FIELD_SEPERATOR = ":";
|
||||
public static final String FIELD_SEPERATOR = "\t";
|
||||
public static final String FILTER_CODE_SEPERATOR = ";";
|
||||
public static final String INFO_FIELD_SEPERATOR = ";";
|
||||
|
||||
// default values
|
||||
public static final String UNFILTERED = ".";
|
||||
public static final String PASSES_FILTERS = "0";
|
||||
public static final String EMPTY_INFO_FIELD = ".";
|
||||
public static final String EMPTY_ID_FIELD = ".";
|
||||
public static final String EMPTY_ALLELE_FIELD = ".";
|
||||
public static final String DOUBLE_PRECISION_FORMAT_STRING = "%.2f";
|
||||
public static final int MISSING_GENOTYPE_QUALITY = -1;
|
||||
|
||||
// the reference base
|
||||
private String mReferenceBases;
|
||||
// our location
|
||||
private GenomeLoc mLoc;
|
||||
// our id
|
||||
private String mID;
|
||||
// the alternate bases
|
||||
private final List<VCFGenotypeEncoding> mAlts = new ArrayList<VCFGenotypeEncoding>();
|
||||
// our qual value
|
||||
private double mQual;
|
||||
// our filter string
|
||||
private String mFilterString;
|
||||
// our info fields -- use a TreeMap to ensure they can be pulled out in order (so it passes integration tests)
|
||||
private final Map<String, String> mInfoFields = new TreeMap<String, String>();
|
||||
|
||||
private String mGenotypeFormatString;
|
||||
|
||||
// our genotype sample fields
|
||||
private final List<VCFGenotypeRecord> mGenotypeRecords = new ArrayList<VCFGenotypeRecord>();
|
||||
|
||||
/**
|
||||
* given a reference base, a location, and the format string, create a VCF record.
|
||||
*
|
||||
* @param referenceBases the reference bases to use
|
||||
* @param location our genomic location
|
||||
* @param genotypeFormatString the format string
|
||||
*/
|
||||
public VCFRecord(String referenceBases, GenomeLoc location, String genotypeFormatString) {
|
||||
setReferenceBase(referenceBases);
|
||||
setLocation(location);
|
||||
mGenotypeFormatString = genotypeFormatString;
|
||||
}
|
||||
|
||||
/**
|
||||
* given the values for each of the columns, create a VCF record.
|
||||
*
|
||||
* @param columnValues a mapping of header strings to values
|
||||
* @param genotypeFormatString the format string for the genotype records
|
||||
* @param genotypeRecords the genotype records
|
||||
*/
|
||||
public VCFRecord(Map<VCFHeader.HEADER_FIELDS, String> columnValues, String genotypeFormatString, List<VCFGenotypeRecord> genotypeRecords) {
|
||||
extractFields(columnValues);
|
||||
mGenotypeRecords.addAll(genotypeRecords);
|
||||
mGenotypeFormatString = genotypeFormatString;
|
||||
}
|
||||
|
||||
/**
|
||||
* given the values for each of the columns, create a VCF record.
|
||||
*
|
||||
* @param columnValues a mapping of header strings to values
|
||||
*/
|
||||
public VCFRecord(Map<VCFHeader.HEADER_FIELDS, String> columnValues) {
|
||||
extractFields(columnValues);
|
||||
mGenotypeFormatString = "";
|
||||
}
|
||||
|
||||
/**
|
||||
* create a VCF record
|
||||
*
|
||||
* @param referenceBases the reference bases to use
|
||||
* @param contig the contig this variant is on
|
||||
* @param position our position
|
||||
* @param ID our ID string
|
||||
* @param altBases the list of alternate bases
|
||||
* @param qual the qual field
|
||||
* @param filters the filters used on this variant
|
||||
* @param infoFields the information fields
|
||||
* @param genotypeFormatString the format string
|
||||
* @param genotypeObjects the genotype objects
|
||||
*/
|
||||
public VCFRecord(String referenceBases,
|
||||
String contig,
|
||||
long position,
|
||||
String ID,
|
||||
List<VCFGenotypeEncoding> altBases,
|
||||
double qual,
|
||||
String filters,
|
||||
Map<String, String> infoFields,
|
||||
String genotypeFormatString,
|
||||
List<VCFGenotypeRecord> genotypeObjects) {
|
||||
setReferenceBase(referenceBases);
|
||||
setLocation(contig, position);
|
||||
this.mID = ID;
|
||||
for (VCFGenotypeEncoding alt : altBases)
|
||||
this.addAlternateBase(alt);
|
||||
this.setQual(qual);
|
||||
this.setFilterString(filters);
|
||||
this.mInfoFields.putAll(infoFields);
|
||||
this.mGenotypeFormatString = genotypeFormatString;
|
||||
this.mGenotypeRecords.addAll(genotypeObjects);
|
||||
}
|
||||
|
||||
/**
|
||||
* extract the field values from the passed in array
|
||||
*
|
||||
* @param columnValues a map of the header fields to values
|
||||
*/
|
||||
private void extractFields(Map<VCFHeader.HEADER_FIELDS, String> columnValues) {
|
||||
String chrom = null;
|
||||
long position = -1;
|
||||
|
||||
for (VCFHeader.HEADER_FIELDS val : columnValues.keySet()) {
|
||||
switch (val) {
|
||||
case CHROM:
|
||||
chrom = columnValues.get(val);
|
||||
break;
|
||||
case POS:
|
||||
position = Integer.valueOf(columnValues.get(val));
|
||||
break;
|
||||
case ID:
|
||||
setID(columnValues.get(val));
|
||||
break;
|
||||
case REF:
|
||||
if (columnValues.get(val).length() != 1)
|
||||
throw new IllegalArgumentException("Reference base should be a single character");
|
||||
setReferenceBase(columnValues.get(val));
|
||||
break;
|
||||
case ALT:
|
||||
String values[] = columnValues.get(val).split(",");
|
||||
for (String alt : values)
|
||||
addAlternateBase(new VCFGenotypeEncoding(alt));
|
||||
break;
|
||||
case QUAL:
|
||||
setQual(Double.valueOf(columnValues.get(val)));
|
||||
break;
|
||||
case FILTER:
|
||||
setFilterString(columnValues.get(val));
|
||||
break;
|
||||
case INFO:
|
||||
String vals[] = columnValues.get(val).split(";");
|
||||
for (String alt : vals) {
|
||||
if ( alt.equals(EMPTY_INFO_FIELD) )
|
||||
continue;
|
||||
String keyVal[] = alt.split("=");
|
||||
if ( keyVal.length == 1 )
|
||||
addInfoField(keyVal[0], "");
|
||||
else if (keyVal.length == 2)
|
||||
addInfoField(keyVal[0], keyVal[1]);
|
||||
else
|
||||
throw new IllegalArgumentException("info field key-value pair did not parse into key->value pair: " + alt);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
setLocation(chrom, position);
|
||||
}
|
||||
|
||||
/**
|
||||
* do we have genotyping data
|
||||
*
|
||||
* @return true if we have genotyping data, false otherwise
|
||||
*/
|
||||
|
||||
public boolean hasGenotypeData() {
|
||||
return (mGenotypeRecords.size() > 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return this VCF record's location
|
||||
*/
|
||||
public GenomeLoc getLocation() {
|
||||
return mLoc;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the ID value for this record
|
||||
*/
|
||||
public String getID() {
|
||||
return mID == null ? EMPTY_ID_FIELD : mID;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the reference base
|
||||
*
|
||||
* @return either A, T, C, G, or N
|
||||
*/
|
||||
public String getReference() {
|
||||
return mReferenceBases;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the alternate allele strings
|
||||
*
|
||||
* @return an array of strings representing the alt alleles, or null if there are none
|
||||
*/
|
||||
public List<String> getAlternateAlleleList() {
|
||||
ArrayList<String> alts = new ArrayList<String>();
|
||||
for ( VCFGenotypeEncoding alt : mAlts )
|
||||
alts.add(alt.getBases());
|
||||
return alts;
|
||||
}
|
||||
|
||||
public List<VCFGenotypeEncoding> getAlternateAlleles() {
|
||||
return mAlts;
|
||||
}
|
||||
|
||||
public boolean hasAlternateAllele() {
|
||||
for ( VCFGenotypeEncoding alt : mAlts ) {
|
||||
if ( alt.getType() != VCFGenotypeEncoding.TYPE.UNCALLED )
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean isBiallelic() {
|
||||
return getAlternateAlleles().size() == 1;
|
||||
}
|
||||
|
||||
public boolean isReference() {
|
||||
return !hasAlternateAllele();
|
||||
}
|
||||
|
||||
public List<String> getAlleleList() {
|
||||
ArrayList<String> list = new ArrayList<String>();
|
||||
list.add(getReference());
|
||||
list.addAll(getAlternateAlleleList());
|
||||
return list;
|
||||
}
|
||||
|
||||
public double getNonRefAlleleFrequency() {
|
||||
if ( mInfoFields.containsKey(ALLELE_FREQUENCY_KEY) ) {
|
||||
return Double.valueOf(mInfoFields.get(ALLELE_FREQUENCY_KEY));
|
||||
} else {
|
||||
// this is the poor man's AF
|
||||
if ( mInfoFields.containsKey(ALLELE_COUNT_KEY) && mInfoFields.containsKey(ALLELE_NUMBER_KEY)) {
|
||||
String splt[] = mInfoFields.get(ALLELE_COUNT_KEY).split(",");
|
||||
if ( splt.length > 0 ) {
|
||||
return (Double.valueOf(splt[0]) / Double.valueOf(mInfoFields.get(ALLELE_NUMBER_KEY)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
public VCFGenotypeEncoding.TYPE getType() {
|
||||
VCFGenotypeEncoding.TYPE type = mAlts.get(0).getType();
|
||||
for (int i = 1; i < mAlts.size(); i++) {
|
||||
if ( mAlts.get(i).getType() != type )
|
||||
return VCFGenotypeEncoding.TYPE.MIXED; // if we have more than one type, return mixed
|
||||
}
|
||||
return type;
|
||||
}
|
||||
|
||||
public boolean isDeletion() {
|
||||
return getType() == VCFGenotypeEncoding.TYPE.DELETION;
|
||||
}
|
||||
|
||||
public boolean isInsertion() {
|
||||
return getType() == VCFGenotypeEncoding.TYPE.INSERTION;
|
||||
}
|
||||
|
||||
public boolean isIndel() {
|
||||
return isDeletion() || isInsertion();
|
||||
}
|
||||
|
||||
public boolean isSNP() {
|
||||
return getType() == VCFGenotypeEncoding.TYPE.SINGLE_BASE;
|
||||
}
|
||||
|
||||
public boolean isNovel() {
|
||||
return ( ! isInDBSNP() ) && ( ! isInHapmap() );
|
||||
}
|
||||
|
||||
public boolean isInDBSNP() {
|
||||
return ( ( mID != null && ! mID.equals(".") ) || ( mInfoFields.get(DBSNP_KEY) != null && mInfoFields.get(DBSNP_KEY).equals("1") ) );
|
||||
}
|
||||
|
||||
public boolean isInHapmap() {
|
||||
if ( mInfoFields.get(HAPMAP2_KEY) != null && mInfoFields.get(HAPMAP2_KEY).equals("1") ) {
|
||||
return true;
|
||||
} else {
|
||||
return ( mInfoFields.get(HAPMAP3_KEY) != null && mInfoFields.get(HAPMAP3_KEY).equals("1") );
|
||||
}
|
||||
}
|
||||
|
||||
public char getAlternativeBaseForSNP() {
|
||||
if ( !isSNP() && !isBiallelic() )
|
||||
throw new IllegalStateException("This record does not represent a SNP");
|
||||
return mAlts.get(0).getBases().charAt(0);
|
||||
}
|
||||
|
||||
public char getReferenceForSNP() {
|
||||
if ( !isSNP() )
|
||||
throw new IllegalStateException("This record does not represent a SNP");
|
||||
return getReference().charAt(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the phred-scaled quality score
|
||||
*/
|
||||
public double getQual() {
|
||||
return mQual;
|
||||
}
|
||||
|
||||
public boolean isMissingQual() {
|
||||
return (int)mQual == MISSING_GENOTYPE_QUALITY;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the -log10PError
|
||||
*/
|
||||
public double getNegLog10PError() {
|
||||
return mQual / 10.0;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the filter criteria
|
||||
*
|
||||
* @return an array of strings representing the filtering criteria, or UNFILTERED if none are applied
|
||||
*/
|
||||
public String[] getFilteringCodes() {
|
||||
if (mFilterString == null) return new String[]{UNFILTERED};
|
||||
return mFilterString.split(FILTER_CODE_SEPERATOR);
|
||||
}
|
||||
|
||||
public boolean isFiltered() {
|
||||
String[] codes = getFilteringCodes();
|
||||
return !codes[0].equals(UNFILTERED) && !codes[0].equals(PASSES_FILTERS);
|
||||
}
|
||||
|
||||
// public boolean hasFilteringCodes() {
|
||||
// return mFilterString != null;
|
||||
// }
|
||||
|
||||
public String getFilterString() {
|
||||
return mFilterString;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the information key-value pairs as a Map<>
|
||||
*
|
||||
* @return a map, of the info key-value pairs
|
||||
*/
|
||||
public final Map<String, String> getInfoValues() {
|
||||
return mInfoFields;
|
||||
}
|
||||
|
||||
public List<VCFGenotypeRecord> getVCFGenotypeRecords() {
|
||||
return mGenotypeRecords;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return a List of the sample names
|
||||
*/
|
||||
public String[] getSampleNames() {
|
||||
String names[] = new String[mGenotypeRecords.size()];
|
||||
for (int i = 0; i < mGenotypeRecords.size(); i++) {
|
||||
names[i] = mGenotypeRecords.get(i).getSampleName();
|
||||
}
|
||||
return names;
|
||||
}
|
||||
|
||||
public VCFGenotypeRecord getGenotype(final String sampleName) {
|
||||
for ( VCFGenotypeRecord rec : getVCFGenotypeRecords() ) {
|
||||
if ( rec.getSampleName().equals(sampleName) ) {
|
||||
return rec;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public String getGenotypeFormatString() {
|
||||
return mGenotypeFormatString;
|
||||
}// the formatting string for our genotype records
|
||||
|
||||
public void setGenotypeFormatString(String newFormatString) {
|
||||
mGenotypeFormatString = newFormatString;
|
||||
}
|
||||
|
||||
public void setReferenceBase(String reference) {
|
||||
mReferenceBases = reference.toUpperCase();
|
||||
}
|
||||
|
||||
public void setLocation(String chrom, long position) {
|
||||
if ( chrom == null )
|
||||
throw new IllegalArgumentException("Chromosomes cannot be missing");
|
||||
if ( position < 0 )
|
||||
throw new IllegalArgumentException("Position values must be greater than 0");
|
||||
mLoc = GenomeLocParser.createGenomeLoc(chrom, position);
|
||||
}
|
||||
|
||||
public void setLocation(GenomeLoc location) {
|
||||
mLoc = location.clone();
|
||||
}
|
||||
|
||||
public void setID(String ID) {
|
||||
mID = ID;
|
||||
}
|
||||
|
||||
public void setQual(double qual) {
|
||||
if ( qual < 0 && (int)qual != MISSING_GENOTYPE_QUALITY )
|
||||
throw new IllegalArgumentException("Qual values cannot be negative unless they are " + MISSING_GENOTYPE_QUALITY + " ('unknown')");
|
||||
mQual = qual;
|
||||
}
|
||||
|
||||
public void setFilterString(String filterString) {
|
||||
mFilterString = filterString;
|
||||
}
|
||||
|
||||
public void addGenotypeRecord(VCFGenotypeRecord mGenotypeRecord) {
|
||||
mGenotypeRecords.add(mGenotypeRecord);
|
||||
}
|
||||
|
||||
public void setGenotypeRecords(List<VCFGenotypeRecord> records) {
|
||||
mGenotypeRecords.clear();
|
||||
for ( VCFGenotypeRecord g : records )
|
||||
addGenotypeRecord(g);
|
||||
}
|
||||
|
||||
/**
|
||||
* add an alternate base to our alternate base list. All bases are uppercased
|
||||
* before being added to the list.
|
||||
*
|
||||
* @param base the base to add
|
||||
*/
|
||||
public void addAlternateBase(VCFGenotypeEncoding base) {
|
||||
if (!mAlts.contains(base)) mAlts.add(base);
|
||||
}
|
||||
|
||||
public void setAlternateBases(List<VCFGenotypeEncoding> bases) {
|
||||
mAlts.clear();
|
||||
for ( VCFGenotypeEncoding e : bases )
|
||||
addAlternateBase(e);
|
||||
}
|
||||
|
||||
/**
|
||||
* add an info field to the record
|
||||
*
|
||||
* @param key the key, from the spec or a user created key
|
||||
* @param value it's value as a string
|
||||
*/
|
||||
public void addInfoField(String key, String value) {
|
||||
//System.out.printf("Adding info field %s=%s%n", key, value);
|
||||
mInfoFields.put(key, value);
|
||||
}
|
||||
|
||||
public void printInfoFields() {
|
||||
for ( Map.Entry<String, String> e : mInfoFields.entrySet() ) {
|
||||
System.out.printf(" Current info field %s=%s this=%s%n", e.getKey(), e.getValue(), this);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* add an info field to the record
|
||||
*
|
||||
* @param m A map from info keys to info values
|
||||
*/
|
||||
public void addInfoFields(Map<String,String> m) {
|
||||
for ( Map.Entry<String, String> e : m.entrySet() )
|
||||
addInfoField(e.getKey(), e.getValue());
|
||||
}
|
||||
|
||||
/**
|
||||
* the generation of a string representation, which is used by the VCF writer
|
||||
*
|
||||
* @param header the VCF header for this VCF Record
|
||||
* @return a string
|
||||
*/
|
||||
public String toStringEncoding(VCFHeader header) {
|
||||
return toStringEncoding(header, VCFGenotypeWriter.VALIDATION_STRINGENCY.STRICT);
|
||||
}
|
||||
|
||||
/**
|
||||
* the generation of a string representation, which is used by the VCF writer
|
||||
*
|
||||
* @param header the VCF header for this VCF Record
|
||||
* @param validationStringency the validation stringency
|
||||
* @return a string
|
||||
*/
|
||||
public String toStringEncoding(VCFHeader header, VCFGenotypeWriter.VALIDATION_STRINGENCY validationStringency) {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
|
||||
// CHROM \t POS \t ID \t REF \t ALT \t QUAL \t FILTER \t INFO
|
||||
builder.append(mLoc.getContig());
|
||||
builder.append(FIELD_SEPERATOR);
|
||||
builder.append(mLoc.getStart());
|
||||
builder.append(FIELD_SEPERATOR);
|
||||
builder.append(getID());
|
||||
builder.append(FIELD_SEPERATOR);
|
||||
builder.append(getReference());
|
||||
builder.append(FIELD_SEPERATOR);
|
||||
List<VCFGenotypeEncoding> alts = getAlternateAlleles();
|
||||
if ( alts.size() > 0 ) {
|
||||
builder.append(alts.get(0));
|
||||
for ( int i = 1; i < alts.size(); i++ ) {
|
||||
builder.append(",");
|
||||
builder.append(alts.get(i));
|
||||
}
|
||||
} else {
|
||||
builder.append(EMPTY_ALLELE_FIELD);
|
||||
}
|
||||
builder.append(FIELD_SEPERATOR);
|
||||
if ( (int)mQual == MISSING_GENOTYPE_QUALITY )
|
||||
builder.append(MISSING_GENOTYPE_QUALITY);
|
||||
else
|
||||
builder.append(String.format(DOUBLE_PRECISION_FORMAT_STRING, mQual));
|
||||
builder.append(FIELD_SEPERATOR);
|
||||
builder.append(Utils.join(FILTER_CODE_SEPERATOR, getFilteringCodes()));
|
||||
builder.append(FIELD_SEPERATOR);
|
||||
builder.append(createInfoString());
|
||||
|
||||
if ( mGenotypeFormatString != null && mGenotypeFormatString.length() > 0 ) {
|
||||
// try {
|
||||
addGenotypeData(builder, header);
|
||||
// } catch (Exception e) {
|
||||
// if ( validationStringency == VCFGenotypeWriter.VALIDATION_STRINGENCY.STRICT )
|
||||
// throw new RuntimeException(e);
|
||||
// }
|
||||
}
|
||||
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* create the info string
|
||||
*
|
||||
* @return a string representing the infomation fields
|
||||
*/
|
||||
protected String createInfoString() {
|
||||
StringBuffer info = new StringBuffer();
|
||||
boolean isFirst = true;
|
||||
for (String str : mInfoFields.keySet()) {
|
||||
if ( isFirst )
|
||||
isFirst = false;
|
||||
else
|
||||
info.append(INFO_FIELD_SEPERATOR);
|
||||
info.append(str);
|
||||
info.append("=");
|
||||
info.append(mInfoFields.get(str));
|
||||
}
|
||||
return info.length() == 0 ? EMPTY_INFO_FIELD : info.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* add the genotype data
|
||||
*
|
||||
* @param builder the string builder
|
||||
* @param header the header object
|
||||
*/
|
||||
private void addGenotypeData(StringBuilder builder, VCFHeader header) {
|
||||
Map<String, VCFGenotypeRecord> gMap = genotypeListToMap(getVCFGenotypeRecords());
|
||||
|
||||
StringBuffer tempStr = new StringBuffer();
|
||||
if ( header.getGenotypeSamples().size() < getVCFGenotypeRecords().size() ) {
|
||||
for ( String sample : gMap.keySet() ) {
|
||||
if ( !header.getGenotypeSamples().contains(sample) )
|
||||
System.err.println("Sample " + sample + " is a duplicate or is otherwise not present in the header");
|
||||
else
|
||||
header.getGenotypeSamples().remove(sample);
|
||||
}
|
||||
throw new IllegalStateException("We have more genotype samples than the header specified; please check that samples aren't duplicated");
|
||||
}
|
||||
tempStr.append(FIELD_SEPERATOR + mGenotypeFormatString);
|
||||
|
||||
String[] genotypeFormatStrings = mGenotypeFormatString.split(":");
|
||||
|
||||
for ( String genotype : header.getGenotypeSamples() ) {
|
||||
tempStr.append(FIELD_SEPERATOR);
|
||||
if ( gMap.containsKey(genotype) ) {
|
||||
VCFGenotypeRecord rec = gMap.get(genotype);
|
||||
tempStr.append(rec.toStringEncoding(mAlts, genotypeFormatStrings));
|
||||
gMap.remove(genotype);
|
||||
} else {
|
||||
tempStr.append(VCFGenotypeRecord.stringEncodingForEmptyGenotype(genotypeFormatStrings));
|
||||
}
|
||||
}
|
||||
if ( gMap.size() != 0 ) {
|
||||
for ( String sample : gMap.keySet() )
|
||||
System.err.println("Sample " + sample + " is being genotyped but isn't in the header.");
|
||||
throw new IllegalStateException("We failed to use all the genotype samples; there must be an inconsistancy between the header and records");
|
||||
}
|
||||
|
||||
builder.append(tempStr);
|
||||
}
|
||||
|
||||
/**
|
||||
* compare two VCF records
|
||||
*
|
||||
* @param other the other VCF record
|
||||
* @return true if they're equal
|
||||
*/
|
||||
public boolean equals(VCFRecord other) {
|
||||
if (!this.mAlts.equals(other.mAlts)) return false;
|
||||
if (!this.mReferenceBases.equals(other.mReferenceBases)) return false;
|
||||
if (!this.mLoc.equals(other.mLoc)) return false;
|
||||
if (!this.mID.equals(other.mID)) return false;
|
||||
if (this.mQual != other.mQual) return false;
|
||||
if ( this.mFilterString == null ) {
|
||||
if ( other.mFilterString != null ) return false;
|
||||
} else if ( !this.mFilterString.equals(other.mFilterString) ) return false;
|
||||
if (!this.mInfoFields.equals(other.mInfoFields)) return false;
|
||||
if (!this.mGenotypeRecords.equals(other.mGenotypeRecords)) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* create a genotype mapping from a list and their sample names
|
||||
*
|
||||
* @param list a list of genotype samples
|
||||
* @return a mapping of the sample name to VCF genotype record
|
||||
*/
|
||||
private static Map<String, VCFGenotypeRecord> genotypeListToMap(List<VCFGenotypeRecord> list) {
|
||||
Map<String, VCFGenotypeRecord> map = new HashMap<String, VCFGenotypeRecord>();
|
||||
for (int i = 0; i < list.size(); i++) {
|
||||
VCFGenotypeRecord rec = list.get(i);
|
||||
map.put(rec.getSampleName(), rec);
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -25,10 +25,11 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
|
|
@ -48,7 +49,7 @@ public class VCFUtils {
|
|||
|
||||
for ( ReferenceOrderedDataSource source : toolkit.getRodDataSources() ) {
|
||||
RMDTrack rod = source.getReferenceOrderedData();
|
||||
if ( rod.getType().equals(RodVCF.class) ) {
|
||||
if ( rod.getType().equals(VCFRecord.class) ) {
|
||||
vcfs.add(rod);
|
||||
}
|
||||
}
|
||||
|
|
@ -72,7 +73,7 @@ public class VCFUtils {
|
|||
List<ReferenceOrderedDataSource> dataSources = toolkit.getRodDataSources();
|
||||
for ( ReferenceOrderedDataSource source : dataSources ) {
|
||||
RMDTrack rod = source.getReferenceOrderedData();
|
||||
if ( rod.getType().equals(RodVCF.class) ) {
|
||||
if ( rod.getType().equals(VCFCodec.class) ) {
|
||||
VCFReader reader = new VCFReader(rod.getFile());
|
||||
fields.addAll(reader.getHeader().getMetaData());
|
||||
reader.close();
|
||||
|
|
@ -89,7 +90,7 @@ public class VCFUtils {
|
|||
* @param rodNamesToSampleNames mapping of rod/sample pairs to new uniquified sample names
|
||||
* @return the new merged vcf record
|
||||
*/
|
||||
public static VCFRecord mergeRecords(List<RodVCF> rods, Map<Pair<String, String>, String> rodNamesToSampleNames) {
|
||||
public static VCFRecord mergeRecords(Map<VCFRecord,String> rods, Map<Pair<String, String>, String> rodNamesToSampleNames) {
|
||||
|
||||
VCFParameters params = new VCFParameters();
|
||||
params.addFormatItem(VCFGenotypeRecord.GENOTYPE_KEY);
|
||||
|
|
@ -100,14 +101,14 @@ public class VCFUtils {
|
|||
Map<String, String> infoFields = new HashMap<String, String>();
|
||||
List<String> filters = new ArrayList<String>();
|
||||
|
||||
for ( RodVCF rod : rods ) {
|
||||
for ( VCFRecord rod : rods.keySet() ) {
|
||||
List<VCFGenotypeRecord> myGenotypes = rod.getVCFGenotypeRecords();
|
||||
for ( VCFGenotypeRecord call : myGenotypes ) {
|
||||
// set the name to be the new uniquified name and add it to the list of genotypes
|
||||
call.setSampleName(rodNamesToSampleNames.get(new Pair<String, String>(rod.getName(), call.getSampleName())));
|
||||
call.setSampleName(rodNamesToSampleNames.get(new Pair<String, String>(rods.get(rod), call.getSampleName())));
|
||||
if ( params.getPosition() < 1 )
|
||||
params.setLocations(rod.getLocation(), call.getReference());
|
||||
params.addGenotypeRecord(createVCFGenotypeRecord(params, call, rod.mCurrentRecord));
|
||||
params.setLocations(GenomeLocParser.createGenomeLoc(rod.getChr(), rod.getStart()), call.getReference());
|
||||
params.addGenotypeRecord(createVCFGenotypeRecord(params, call, rod));
|
||||
}
|
||||
|
||||
// set the overall confidence to be the max entry we see
|
||||
|
|
|
|||
|
|
@ -1,6 +1,10 @@
|
|||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
|
||||
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.TreeSet;
|
||||
|
||||
|
|
@ -97,7 +101,7 @@ public class VCFWriter {
|
|||
if ( mHeader == null )
|
||||
throw new IllegalStateException("The VCF Header must be written before records can be added");
|
||||
|
||||
String vcfString = record.toStringEncoding(mHeader, validationStringency);
|
||||
String vcfString = record.toStringEncoding(mHeader);
|
||||
try {
|
||||
mWriter.write(vcfString + "\n");
|
||||
mWriter.flush(); // necessary so that writing to an output stream will work
|
||||
|
|
|
|||
|
|
@ -1,181 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
|
||||
import org.junit.Assert;
|
||||
import static org.junit.Assert.fail;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class RodVCFUnitTest
|
||||
* <p/>
|
||||
* test out the rod VCF
|
||||
*/
|
||||
public class RodVCFUnitTest extends BaseTest {
|
||||
|
||||
private static File vcfFile = new File(validationDataLocation + "vcfexample.vcf");
|
||||
private VCFHeader mHeader;
|
||||
@BeforeClass
|
||||
public static void beforeTests() {
|
||||
IndexedFastaSequenceFile seq;
|
||||
try {
|
||||
seq = new IndexedFastaSequenceFile(new File(oneKGLocation + "reference/human_b36_both.fasta"));
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("unable to load the sequence dictionary");
|
||||
}
|
||||
GenomeLocParser.setupRefContigOrdering(seq);
|
||||
|
||||
}
|
||||
|
||||
private RodVCF getVCFObject() {
|
||||
RodVCF vcf = new RodVCF("VCF");
|
||||
mHeader = null;
|
||||
try {
|
||||
mHeader = (VCFHeader) vcf.initialize(vcfFile);
|
||||
} catch (FileNotFoundException e) {
|
||||
fail("Unable to open VCF file");
|
||||
}
|
||||
mHeader.checkVCFVersion();
|
||||
return vcf;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testInitialize() {
|
||||
getVCFObject();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testToIterator() {
|
||||
RodVCF vcf = getVCFObject();
|
||||
VCFReader reader = new VCFReader(vcfFile);
|
||||
Iterator<RodVCF> iter = vcf.createIterator("VCF", vcfFile);
|
||||
while (iter.hasNext()) {
|
||||
VCFRecord rec1 = reader.next();
|
||||
VCFRecord rec2 = iter.next().mCurrentRecord;
|
||||
if (!rec1.equals(rec2)) {
|
||||
fail("VCF record rec1 != rec2");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testToMAF() {
|
||||
RodVCF vcf = getVCFObject();
|
||||
Iterator<RodVCF> iter = vcf.createIterator("VCF", vcfFile);
|
||||
RodVCF rec = iter.next();
|
||||
Assert.assertEquals(rec.getNonRefAlleleFrequency(), 0.786, 0.00001);
|
||||
rec = iter.next();
|
||||
rec = iter.next();
|
||||
rec = iter.next();
|
||||
Assert.assertEquals(rec.getNonRefAlleleFrequency(), 0.0, 0.00001);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testToString() {
|
||||
// slightly altered line, due to map ordering
|
||||
final String firstLine = "20\t14370\trs6054257\tG\tA\t29.00\t0\tAF=0.786;DP=258;NS=58\tGT:GQ:DP:HQ\t0|0:48:1:51,51\t1|0:48:8:51,51\t1/1:43:5:\n";
|
||||
RodVCF vcf = getVCFObject();
|
||||
VCFReader reader = new VCFReader(vcfFile);
|
||||
Iterator<RodVCF> iter = vcf.createIterator("VCF", vcfFile);
|
||||
boolean first = true;
|
||||
while (iter.hasNext()) {
|
||||
VCFRecord rec1 = reader.next();
|
||||
VCFRecord rec2 = iter.next().mCurrentRecord;
|
||||
if (!rec1.toStringEncoding(mHeader).equals(rec2.toStringEncoding(mHeader))) {
|
||||
fail("VCF record rec1.toStringEncoding() != rec2.toStringEncoding()");
|
||||
}
|
||||
// verify the first line too
|
||||
if (first) {
|
||||
if (!firstLine.equals(rec1.toStringEncoding(mHeader) + "\n")) {
|
||||
fail("VCF record rec1.toStringEncoding() != expected string :\n" + rec1.toStringEncoding(mHeader) + "\n" + firstLine);
|
||||
}
|
||||
first = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testInsertion() {
|
||||
RodVCF vcf = getVCFObject();
|
||||
Iterator<RodVCF> iter = vcf.createIterator("VCF", vcfFile);
|
||||
RodVCF rec = iter.next();
|
||||
Assert.assertTrue(rec.isSNP());
|
||||
rec = iter.next();
|
||||
rec = iter.next();
|
||||
rec = iter.next();
|
||||
rec = iter.next();
|
||||
Assert.assertTrue(rec.isIndel());
|
||||
Assert.assertTrue(rec.isDeletion());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDeletion() {
|
||||
RodVCF vcf = getVCFObject();
|
||||
Iterator<RodVCF> iter = vcf.createIterator("VCF", vcfFile);
|
||||
RodVCF rec = iter.next();
|
||||
Assert.assertTrue(rec.isSNP());
|
||||
rec = iter.next();
|
||||
rec = iter.next();
|
||||
rec = iter.next();
|
||||
rec = iter.next();
|
||||
rec = iter.next();
|
||||
|
||||
Assert.assertTrue(rec.isIndel());
|
||||
Assert.assertTrue(rec.isInsertion());
|
||||
}
|
||||
@Test
|
||||
public void testGetGenotypes() {
|
||||
RodVCF vcf = getVCFObject();
|
||||
Iterator<RodVCF> iter = vcf.createIterator("VCF", vcfFile);
|
||||
RodVCF rec = iter.next();
|
||||
// we should get back a ref 'G' and an alt 'A'
|
||||
List<VCFGenotypeRecord> list = rec.getVCFGenotypeRecords();
|
||||
List<String> knowngenotypes = new ArrayList<String>();
|
||||
knowngenotypes.add("GG");
|
||||
knowngenotypes.add("AG");
|
||||
knowngenotypes.add("AA");
|
||||
Assert.assertEquals(3, list.size());
|
||||
for (VCFGenotypeRecord g: list) {
|
||||
Assert.assertTrue(knowngenotypes.contains(g.getBases()));
|
||||
}
|
||||
}
|
||||
@Test
|
||||
public void testGetGenotypesQual() {
|
||||
RodVCF vcf = getVCFObject();
|
||||
Iterator<RodVCF> iter = vcf.createIterator("VCF", vcfFile);
|
||||
RodVCF rec = iter.next();
|
||||
// we should get back a ref 'G' and an alt 'A'
|
||||
List<VCFGenotypeRecord> list = rec.getVCFGenotypeRecords();
|
||||
Assert.assertEquals(4.8,list.get(0).getNegLog10PError(),0.0001);
|
||||
Assert.assertEquals(4.8,list.get(1).getNegLog10PError(),0.0001);
|
||||
Assert.assertEquals(4.3,list.get(2).getNegLog10PError(),0.0001);
|
||||
}
|
||||
@Test
|
||||
public void testGetLocation() {
|
||||
RodVCF vcf = getVCFObject();
|
||||
Iterator<RodVCF> iter = vcf.createIterator("VCF", vcfFile);
|
||||
RodVCF rec = iter.next();
|
||||
GenomeLoc loc = rec.getLocation();
|
||||
Assert.assertEquals(14370,loc.getStart());
|
||||
Assert.assertEquals(14370,loc.getStop());
|
||||
Assert.assertTrue(loc.getContig().equals("20"));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -27,7 +27,7 @@ public class RodSystemValidationIntegrationTest extends WalkerTest {
|
|||
public void testSimpleVCFPileup() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString1KG() + " -B eval,VCF," + validationDataLocation + "MultiSample.vcf", 1,
|
||||
Arrays.asList("ff4da30a3b6428bc64d61214c715efa5"));
|
||||
Arrays.asList("5d84c75746738833b6c9441d9d614553"));
|
||||
executeTest("testSimpleVCFPileup", spec);
|
||||
}
|
||||
|
||||
|
|
@ -37,7 +37,7 @@ public class RodSystemValidationIntegrationTest extends WalkerTest {
|
|||
baseTestString1KG() + " -B eval,VCF," + validationDataLocation + "MultiSample.vcf" +
|
||||
" -B eval,VCF," + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf"
|
||||
, 1,
|
||||
Arrays.asList("52f758a70ae603213d4399220151357d"));
|
||||
Arrays.asList("6dd0ed0a6fe7096ccb66beffb8d455da"));
|
||||
executeTest("testComplexVCFPileup", spec);
|
||||
}
|
||||
|
||||
|
|
@ -49,7 +49,7 @@ public class RodSystemValidationIntegrationTest extends WalkerTest {
|
|||
" -B eval,VCF," + validationDataLocation + "CEU_hapmap_nogt_23.vcf" +
|
||||
" -L 1 -L 2 -L 20"
|
||||
, 1,
|
||||
Arrays.asList("421a27be91496e88f2f4e197dcfad0ff"));
|
||||
Arrays.asList("ab3da32eae65e8c15a9f4a787a190a37"));
|
||||
executeTest("testLargeComplexVCFPileup", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,151 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class VCFGenotypeEncodingUnitTest
|
||||
* <p/>
|
||||
* test the VCFGenotypeEncoding class
|
||||
*/
|
||||
public class VCFGenotypeEncodingUnitTest extends BaseTest {
|
||||
@Test
|
||||
public void testDecodingSingle() {
|
||||
VCFGenotypeEncoding enc = new VCFGenotypeEncoding("A");
|
||||
Assert.assertTrue("A".equals(enc.toString()));
|
||||
Assert.assertEquals(0, enc.getLength());
|
||||
Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType());
|
||||
|
||||
VCFGenotypeEncoding enc2 = new VCFGenotypeEncoding("C");
|
||||
Assert.assertTrue("C".equals(enc2.toString()));
|
||||
Assert.assertEquals(0, enc.getLength());
|
||||
Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType());
|
||||
|
||||
VCFGenotypeEncoding enc3 = new VCFGenotypeEncoding("G");
|
||||
Assert.assertTrue("G".equals(enc3.toString()));
|
||||
Assert.assertEquals(0, enc.getLength());
|
||||
Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType());
|
||||
|
||||
VCFGenotypeEncoding enc4 = new VCFGenotypeEncoding("T");
|
||||
Assert.assertTrue("T".equals(enc4.toString()));
|
||||
Assert.assertEquals(0, enc.getLength());
|
||||
Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType());
|
||||
|
||||
VCFGenotypeEncoding enc5 = new VCFGenotypeEncoding("a");
|
||||
Assert.assertTrue("A".equals(enc5.toString()));
|
||||
Assert.assertEquals(0, enc.getLength());
|
||||
Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType());
|
||||
|
||||
VCFGenotypeEncoding enc6 = new VCFGenotypeEncoding("c");
|
||||
Assert.assertTrue("C".equals(enc6.toString()));
|
||||
Assert.assertEquals(0, enc.getLength());
|
||||
Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType());
|
||||
|
||||
VCFGenotypeEncoding enc7 = new VCFGenotypeEncoding("g");
|
||||
Assert.assertTrue("G".equals(enc7.toString()));
|
||||
Assert.assertEquals(0, enc.getLength());
|
||||
Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType());
|
||||
|
||||
VCFGenotypeEncoding enc8 = new VCFGenotypeEncoding("t");
|
||||
Assert.assertTrue("T".equals(enc8.toString()));
|
||||
Assert.assertEquals(0, enc.getLength());
|
||||
Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType());
|
||||
}
|
||||
|
||||
@Test(expected = IllegalArgumentException.class)
|
||||
public void testDecodingSingleBadBase() {
|
||||
VCFGenotypeEncoding enc = new VCFGenotypeEncoding("E");
|
||||
}
|
||||
|
||||
@Test(expected = IllegalArgumentException.class)
|
||||
public void testDecodingSingleWrongBase() {
|
||||
VCFGenotypeEncoding enc = new VCFGenotypeEncoding("I");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testValidIndel() {
|
||||
VCFGenotypeEncoding enc = new VCFGenotypeEncoding("IAGGC");
|
||||
Assert.assertEquals(4, enc.getLength());
|
||||
Assert.assertTrue(enc.getBases().equals("AGGC"));
|
||||
Assert.assertEquals(VCFGenotypeEncoding.TYPE.INSERTION, enc.getType());
|
||||
}
|
||||
|
||||
@Test(expected = IllegalArgumentException.class)
|
||||
public void testBadIndel() {
|
||||
VCFGenotypeEncoding enc = new VCFGenotypeEncoding("IAGRC");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testValidDel() {
|
||||
VCFGenotypeEncoding enc = new VCFGenotypeEncoding("D40");
|
||||
Assert.assertEquals(40, enc.getLength());
|
||||
Assert.assertTrue(enc.getBases().equals(""));
|
||||
Assert.assertEquals(VCFGenotypeEncoding.TYPE.DELETION, enc.getType());
|
||||
}
|
||||
|
||||
@Test(expected = IllegalArgumentException.class)
|
||||
public void testBadDel() {
|
||||
VCFGenotypeEncoding enc = new VCFGenotypeEncoding("DAGCT");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testValidNoCall() {
|
||||
VCFGenotypeEncoding enc = new VCFGenotypeEncoding(".");
|
||||
Assert.assertEquals(0, enc.getLength());
|
||||
Assert.assertTrue(enc.getBases().equals("."));
|
||||
Assert.assertEquals(VCFGenotypeEncoding.TYPE.UNCALLED, enc.getType());
|
||||
}
|
||||
|
||||
@Test(expected = IllegalArgumentException.class)
|
||||
public void testBadNoCall() {
|
||||
VCFGenotypeEncoding enc = new VCFGenotypeEncoding("..");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEquals() {
|
||||
VCFGenotypeEncoding enc = new VCFGenotypeEncoding("A");
|
||||
VCFGenotypeEncoding enc2 = new VCFGenotypeEncoding("A");
|
||||
VCFGenotypeEncoding enc3 = new VCFGenotypeEncoding("C");
|
||||
Assert.assertTrue(enc.equals(enc2));
|
||||
Assert.assertTrue(!enc.equals(enc3));
|
||||
enc = new VCFGenotypeEncoding("D40");
|
||||
enc2 = new VCFGenotypeEncoding("D40");
|
||||
enc3 = new VCFGenotypeEncoding("D41");
|
||||
Assert.assertTrue(enc.equals(enc2));
|
||||
Assert.assertTrue(!enc.equals(enc3));
|
||||
enc = new VCFGenotypeEncoding("IAAC");
|
||||
enc2 = new VCFGenotypeEncoding("IAAC");
|
||||
enc3 = new VCFGenotypeEncoding("IACG");
|
||||
Assert.assertTrue(enc.equals(enc2));
|
||||
Assert.assertTrue(!enc.equals(enc3));
|
||||
enc = new VCFGenotypeEncoding(".");
|
||||
enc2 = new VCFGenotypeEncoding(".");
|
||||
Assert.assertTrue(enc.equals(enc2));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHashCode() {
|
||||
VCFGenotypeEncoding enc = new VCFGenotypeEncoding("A");
|
||||
VCFGenotypeEncoding enc2 = new VCFGenotypeEncoding("A");
|
||||
VCFGenotypeEncoding enc3 = new VCFGenotypeEncoding("C");
|
||||
Assert.assertTrue(enc.hashCode() == enc2.hashCode());
|
||||
Assert.assertTrue(enc.hashCode() != enc3.hashCode());
|
||||
enc = new VCFGenotypeEncoding("D40");
|
||||
enc2 = new VCFGenotypeEncoding("D40");
|
||||
enc3 = new VCFGenotypeEncoding("D41");
|
||||
Assert.assertTrue(enc.hashCode() == enc2.hashCode());
|
||||
Assert.assertTrue(enc.hashCode() != enc3.hashCode());
|
||||
enc = new VCFGenotypeEncoding("IAAC");
|
||||
enc2 = new VCFGenotypeEncoding("IAAC");
|
||||
enc3 = new VCFGenotypeEncoding("IACG");
|
||||
Assert.assertTrue(enc.hashCode() == enc2.hashCode());
|
||||
Assert.assertTrue(enc.hashCode() != enc3.hashCode());
|
||||
enc = new VCFGenotypeEncoding(".");
|
||||
enc2 = new VCFGenotypeEncoding(".");
|
||||
Assert.assertTrue(enc.hashCode() == enc2.hashCode());
|
||||
}
|
||||
}
|
||||
|
|
@ -1,52 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @author aaron
|
||||
*
|
||||
* Class VCFHeaderUnitTest
|
||||
*
|
||||
* Test the VCF Header class
|
||||
*/
|
||||
public class VCFHeaderUnitTest extends BaseTest {
|
||||
|
||||
private Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
|
||||
private Set<String> additionalColumns = new HashSet<String>();
|
||||
|
||||
/**
|
||||
* give it fake data, and make sure we get back the right fake data
|
||||
*/
|
||||
@Test
|
||||
public void testHeaderConstructor() {
|
||||
metaData.add(new VCFHeaderLine(VCFHeader.FILE_FORMAT_KEY, VCFHeader.VCF_VERSION));
|
||||
metaData.add(new VCFHeaderLine("two", "2"));
|
||||
additionalColumns.add("extra1");
|
||||
additionalColumns.add("extra2");
|
||||
// this should create a header that is valid
|
||||
|
||||
VCFHeader header = new VCFHeader(metaData, additionalColumns);
|
||||
|
||||
// check the fields
|
||||
int index = 0;
|
||||
for (VCFHeader.HEADER_FIELDS field : header.getHeaderFields()) {
|
||||
Assert.assertEquals(VCFHeader.HEADER_FIELDS.values()[index],field);
|
||||
index++;
|
||||
}
|
||||
Assert.assertEquals(metaData.size(), header.getMetaData().size());
|
||||
index = 0;
|
||||
for (String key: header.getGenotypeSamples()) {
|
||||
Assert.assertTrue(additionalColumns.contains(key));
|
||||
index++;
|
||||
}
|
||||
Assert.assertEquals(additionalColumns.size(),index);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -1,18 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
public class VCFIntegrationTest extends WalkerTest {
|
||||
|
||||
@Test
|
||||
public void test1() {
|
||||
// Read in and then emit each record
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T PrintRODs -R " + oneKGLocation + "reference/human_b36_both.fasta -L 1:10,000,000-10,050,000 -o %s -B vcf,VCF," + validationDataLocation + "complexExample.vcf", 1,
|
||||
Arrays.asList("68b123acca4975553297fcd776c70464"));
|
||||
executeTest("test vcf", spec);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,366 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileReader;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
import org.junit.Assert;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
/** test the VCFReader class test */
|
||||
public class VCFReaderUnitTest extends BaseTest {
|
||||
|
||||
private static final File vcfFile = new File(validationDataLocation + "vcfexample.vcf");
|
||||
private static final File multiSampleVCF = new File(validationDataLocation + "MultiSample.vcf");
|
||||
private static final String VCF_MIXUP_FILE = validationDataLocation + "mixedup.v2.vcf";
|
||||
private static final File complexFile = new File(validationDataLocation + "complexExample.vcf");
|
||||
private static final File headerNoRecordsFile = new File(validationDataLocation + "headerNoRecords.vcf");
|
||||
private static final File headerSampleSpaceFile = new File(validationDataLocation + "headerSampleSpaceFile.vcf");
|
||||
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeTests() {
|
||||
try {
|
||||
IndexedFastaSequenceFile seq = new IndexedFastaSequenceFile(new File(oneKGLocation + "reference/human_b36_both.fasta"));
|
||||
GenomeLocParser.setupRefContigOrdering(seq);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("unable to load the sequence dictionary");
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testVCFInput() {
|
||||
VCFReader reader = new VCFReader(vcfFile);
|
||||
int counter = 0;
|
||||
while (reader.hasNext()) {
|
||||
counter++;
|
||||
reader.next();
|
||||
}
|
||||
Assert.assertEquals(6, counter);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultiSampleVCFInput() {
|
||||
VCFReader reader = new VCFReader(multiSampleVCF);
|
||||
int counter = 0;
|
||||
while (reader.hasNext()) {
|
||||
counter++;
|
||||
reader.next();
|
||||
}
|
||||
Assert.assertEquals(99, counter);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoCallSites() {
|
||||
VCFReader reader = new VCFReader(multiSampleVCF);
|
||||
if (!reader.hasNext()) Assert.fail("The reader should have a record");
|
||||
VCFRecord rec = reader.next();
|
||||
final int numberOfNoCallsTruth = 9;
|
||||
int noCalls = 0;
|
||||
for (VCFGenotypeRecord record : rec.getVCFGenotypeRecords()) {
|
||||
List<VCFGenotypeEncoding> encodings = record.getAlleles();
|
||||
if (encodings.get(0).getType() == VCFGenotypeEncoding.TYPE.UNCALLED &&
|
||||
encodings.get(1).getType() == VCFGenotypeEncoding.TYPE.UNCALLED)
|
||||
noCalls++;
|
||||
}
|
||||
Assert.assertEquals(numberOfNoCallsTruth, noCalls);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testKnownCallSites() {
|
||||
VCFReader reader = new VCFReader(multiSampleVCF);
|
||||
if (!reader.hasNext()) Assert.fail("The reader should have a record");
|
||||
VCFRecord rec = reader.next();
|
||||
boolean seenNA11992 = false;
|
||||
boolean seenNA12287 = false;
|
||||
for (VCFGenotypeRecord record : rec.getVCFGenotypeRecords()) {
|
||||
if (record.getSampleName().equals("NA11992")) {
|
||||
List<VCFGenotypeEncoding> encodings = record.getAlleles();
|
||||
if (!encodings.get(0).getBases().equals("A") ||
|
||||
!encodings.get(1).getBases().equals("A")) {
|
||||
Assert.fail("Sample NA11992 at site 1:10000005 should be AA");
|
||||
}
|
||||
seenNA11992 = true;
|
||||
}
|
||||
if (record.getSampleName().equals("NA12287")) {
|
||||
List<VCFGenotypeEncoding> encodings = record.getAlleles();
|
||||
if (!encodings.get(0).getBases().equals("A") ||
|
||||
!encodings.get(1).getBases().equals("T")) {
|
||||
Assert.fail("Sample NA11992 at site 1:10000005 should be AA");
|
||||
}
|
||||
seenNA12287 = true;
|
||||
}
|
||||
}
|
||||
Assert.assertTrue(seenNA11992 && seenNA12287);
|
||||
}
|
||||
|
||||
/** test the basic parsing */
|
||||
@Test
|
||||
public void testBasicParsing() {
|
||||
String formatString = "GT:B:C:D";
|
||||
String genotypeString = "0|1:2:3:4";
|
||||
String altAlleles[] = {"A", "G", "T"};
|
||||
char referenceBase = 'C';
|
||||
VCFGenotypeRecord rec = VCFReader.getVCFGenotype("test", formatString, genotypeString, altAlleles, referenceBase);
|
||||
Assert.assertEquals(VCFGenotypeRecord.PHASE.PHASED, rec.getPhaseType());
|
||||
Assert.assertEquals("C", rec.getAlleles().get(0).toString());
|
||||
Assert.assertEquals("A", rec.getAlleles().get(1).toString());
|
||||
Map<String, String> values = rec.getFields();
|
||||
Assert.assertEquals(3, values.size());
|
||||
Assert.assertTrue(values.get("B").equals("2"));
|
||||
Assert.assertTrue(values.get("C").equals("3"));
|
||||
Assert.assertTrue(values.get("D").equals("4"));
|
||||
}
|
||||
|
||||
|
||||
/** test the parsing of a genotype field with missing parameters */
|
||||
@Test
|
||||
public void testMissingFieldParsing() {
|
||||
String formatString = "GT:B:C:D";
|
||||
String genotypeString = "0|1:::4";
|
||||
String altAlleles[] = {"A", "G", "T"};
|
||||
char referenceBase = 'C';
|
||||
VCFGenotypeRecord rec = VCFReader.getVCFGenotype("test", formatString, genotypeString, altAlleles, referenceBase);
|
||||
Assert.assertEquals(VCFGenotypeRecord.PHASE.PHASED, rec.getPhaseType());
|
||||
Assert.assertEquals("C", rec.getAlleles().get(0).toString());
|
||||
Assert.assertEquals("A", rec.getAlleles().get(1).toString());
|
||||
Map<String, String> values = rec.getFields();
|
||||
Assert.assertEquals(3, values.size());
|
||||
Assert.assertTrue(values.get("B").equals(""));
|
||||
Assert.assertTrue(values.get("C").equals(""));
|
||||
Assert.assertTrue(values.get("D").equals("4"));
|
||||
}
|
||||
|
||||
/** test the parsing of a genotype field with different missing parameters */
|
||||
@Test
|
||||
public void testMissingAllFields() {
|
||||
String formatString = "GT:B:C:D";
|
||||
String genotypeString = "0|1:::";
|
||||
String altAlleles[] = {"A", "G", "T"};
|
||||
char referenceBase = 'C';
|
||||
VCFGenotypeRecord rec = VCFReader.getVCFGenotype("test", formatString, genotypeString, altAlleles, referenceBase);
|
||||
Assert.assertEquals(VCFGenotypeRecord.PHASE.PHASED, rec.getPhaseType());
|
||||
Assert.assertEquals("C", rec.getAlleles().get(0).toString());
|
||||
Assert.assertEquals("A", rec.getAlleles().get(1).toString());
|
||||
Map<String, String> values = rec.getFields();
|
||||
Assert.assertEquals(3, values.size());
|
||||
Assert.assertTrue(values.get("B").equals(""));
|
||||
Assert.assertTrue(values.get("C").equals(""));
|
||||
Assert.assertTrue(values.get("D").equals(""));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testKiransOutOfOrderExample() {
|
||||
ArrayList<String> header = new ArrayList<String>();
|
||||
ArrayList<String> variant = new ArrayList<String>();
|
||||
|
||||
try {
|
||||
FileReader reader = new FileReader(VCF_MIXUP_FILE);
|
||||
BufferedReader breader = new BufferedReader(reader);
|
||||
String line;
|
||||
while ((line = breader.readLine()) != null) {
|
||||
String[] pieces = line.split("\t");
|
||||
|
||||
if (line.contains("##")) {
|
||||
continue;
|
||||
} else if (line.contains("#CHROM")) {
|
||||
for (int i = 0; i < pieces.length; i++) {
|
||||
header.add(i, pieces[i]);
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < pieces.length; i++) {
|
||||
variant.add(i, pieces[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (FileNotFoundException e) {
|
||||
Assert.fail("Unable to open the mixed up VCF file.");
|
||||
} catch (IOException e) {
|
||||
Assert.fail("Unable to read from the mixed up VCF file.");
|
||||
}
|
||||
|
||||
// now load up a ROD
|
||||
Iterator<RodVCF> rod = RodVCF.createIterator("name",new File(VCF_MIXUP_FILE));
|
||||
RodVCF newRod = rod.next();
|
||||
List<VCFGenotypeRecord> records = newRod.mCurrentRecord.getVCFGenotypeRecords();
|
||||
for (VCFGenotypeRecord record: records) {
|
||||
if (!header.contains(record.getSampleName())) {
|
||||
Assert.fail("Parsed header doesn't contain field " + record.getSampleName());
|
||||
}
|
||||
if (!variant.get(header.indexOf(record.getSampleName())).equals(record.toStringEncoding(newRod.mCurrentRecord.getAlternateAlleles(), newRod.mCurrentRecord.getGenotypeFormatString().split(":")))) {
|
||||
Assert.fail("Parsed value for " + record.getSampleName() + " doesn't contain the same value ( " + record.toStringEncoding(newRod.mCurrentRecord.getAlternateAlleles(), newRod.mCurrentRecord.getGenotypeFormatString().split(":"))
|
||||
+ " != " + variant.get(header.indexOf(record.getSampleName())));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testComplexExample() {
|
||||
VCFReader reader = new VCFReader(complexFile);
|
||||
|
||||
// test header
|
||||
Set<VCFHeaderLine> headerLines = reader.getHeader().getMetaData();
|
||||
int formatCount = 0;
|
||||
int infoCount = 0;
|
||||
int filterCount = 0;
|
||||
for ( VCFHeaderLine line : headerLines ) {
|
||||
if ( line instanceof VCFFormatHeaderLine )
|
||||
formatCount++;
|
||||
else if ( line instanceof VCFInfoHeaderLine )
|
||||
infoCount++;
|
||||
else if ( line instanceof VCFFilterHeaderLine )
|
||||
filterCount++;
|
||||
}
|
||||
Assert.assertEquals(3, formatCount);
|
||||
Assert.assertEquals(4, infoCount);
|
||||
Assert.assertEquals(0, filterCount);
|
||||
|
||||
// record #1: test dbsnp id
|
||||
if (!reader.hasNext()) Assert.fail("The reader should have a record");
|
||||
VCFRecord rec = reader.next();
|
||||
Assert.assertTrue(rec.getID().equals("testid1"));
|
||||
List<VCFGenotypeRecord> grecords = rec.getVCFGenotypeRecords();
|
||||
int counter = 0;
|
||||
for ( VCFGenotypeRecord grec : grecords ) {
|
||||
if ( grec.isEmptyGenotype() )
|
||||
counter++;
|
||||
}
|
||||
Assert.assertEquals(2, counter);
|
||||
|
||||
// record #2: test info field
|
||||
if (!reader.hasNext()) Assert.fail("The reader should have a record");
|
||||
rec = reader.next();
|
||||
Assert.assertTrue(rec.getInfoValues().get("RMSMAPQ").equals("81.91"));
|
||||
|
||||
// record #3: test alternate alleles
|
||||
if (!reader.hasNext()) Assert.fail("The reader should have a record");
|
||||
rec = reader.next();
|
||||
List<String> alts = rec.getAlternateAlleleList();
|
||||
Assert.assertTrue((alts.get(0).equals("T") && alts.get(1).equals("C")) || (alts.get(0).equals("C") && alts.get(1).equals("T")));
|
||||
|
||||
// record #4: test max GQ value and read count
|
||||
if (!reader.hasNext()) Assert.fail("The reader should have a record");
|
||||
rec = reader.next();
|
||||
grecords = rec.getVCFGenotypeRecords();
|
||||
for ( VCFGenotypeRecord grec : grecords ) {
|
||||
if ( !grec.isEmptyGenotype() ) {
|
||||
Assert.assertTrue(grec.getFields().get("GQ").equals("2000000"));
|
||||
Assert.assertEquals(7, grec.getReadCount());
|
||||
}
|
||||
}
|
||||
|
||||
// record #5: test more alternate alleles
|
||||
if (!reader.hasNext()) Assert.fail("The reader should have a record");
|
||||
rec = reader.next();
|
||||
alts = rec.getAlternateAlleleList();
|
||||
Assert.assertEquals(alts.size(), 3);
|
||||
grecords = rec.getVCFGenotypeRecords();
|
||||
boolean[] seen = new boolean[3];
|
||||
for ( VCFGenotypeRecord grec : grecords ) {
|
||||
if ( grec.getBases().equals("CC") ) {
|
||||
Assert.assertEquals(27, grec.getReadCount());
|
||||
seen[0] = true;
|
||||
} else if ( grec.getBases().equals("AA") ) {
|
||||
Assert.assertEquals(26, grec.getReadCount());
|
||||
seen[1] = true;
|
||||
} else if ( grec.getBases().equals("GT") || grec.getBases().equals("TG") ) {
|
||||
Assert.assertEquals(16, grec.getReadCount());
|
||||
seen[2] = true;
|
||||
}
|
||||
}
|
||||
Assert.assertTrue(seen[0] && seen[1] && seen[2]);
|
||||
|
||||
// record #6: toVariation
|
||||
if (!reader.hasNext()) Assert.fail("The reader should have a record");
|
||||
rec = reader.next();
|
||||
grecords = rec.getVCFGenotypeRecords();
|
||||
for ( VCFGenotypeRecord grec : grecords ) {
|
||||
if ( !grec.isEmptyGenotype() ) {
|
||||
Assert.assertTrue(grec.isVariant(rec.getReference().charAt(0)));
|
||||
Assert.assertEquals(rec, grec.getRecord());
|
||||
}
|
||||
}
|
||||
|
||||
// record #7: default values, isVariation, and isHom/isHet
|
||||
if (!reader.hasNext()) Assert.fail("The reader should have a record");
|
||||
rec = reader.next();
|
||||
grecords = rec.getVCFGenotypeRecords();
|
||||
for ( VCFGenotypeRecord grec : grecords ) {
|
||||
if ( !grec.isVariant(rec.getReference().charAt(0)) ) {
|
||||
Assert.assertTrue(grec.isHom());
|
||||
Assert.assertTrue(grec.getFields().get("GQ").equals("-1"));
|
||||
Assert.assertEquals(-1, grec.getReadCount());
|
||||
} else {
|
||||
Assert.assertTrue(grec.isHet());
|
||||
if ( grec.getReadCount() == 4 )
|
||||
Assert.assertTrue(grec.getFields().get("GQ").equals("-1"));
|
||||
else
|
||||
Assert.assertTrue(grec.getFields().get("GQ").equals("5.85") && grec.getReadCount() == -1);
|
||||
}
|
||||
}
|
||||
|
||||
// record #8: filtered and type
|
||||
if (!reader.hasNext()) Assert.fail("The reader should have a record");
|
||||
rec = reader.next();
|
||||
Assert.assertTrue(!rec.isFiltered());
|
||||
Assert.assertTrue(rec.getFilterString().equals("."));
|
||||
Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, rec.getType());
|
||||
|
||||
// record #9: deletion
|
||||
if (!reader.hasNext()) Assert.fail("The reader should have a record");
|
||||
rec = reader.next();
|
||||
Assert.assertEquals(VCFGenotypeEncoding.TYPE.DELETION, rec.getType());
|
||||
Assert.assertEquals(1, rec.getAlternateAlleleList().size());
|
||||
Assert.assertTrue(rec.getAlternateAlleleList().get(0).equals(""));
|
||||
|
||||
// record #10: insertion
|
||||
if (!reader.hasNext()) Assert.fail("The reader should have a record");
|
||||
rec = reader.next();
|
||||
Assert.assertEquals(VCFGenotypeEncoding.TYPE.INSERTION, rec.getType());
|
||||
Assert.assertEquals(rec.getAlternateAlleleList().size(), 1);
|
||||
Assert.assertTrue(rec.getAlternateAlleleList().get(0).equals("CAT"));
|
||||
|
||||
// record #11: more filters
|
||||
if (!reader.hasNext()) Assert.fail("The reader should have a record");
|
||||
rec = reader.next();
|
||||
Assert.assertTrue(rec.isFiltered());
|
||||
Assert.assertTrue(rec.getFilterString().equals("foo"));
|
||||
|
||||
// record #12: yet more filters
|
||||
if (!reader.hasNext()) Assert.fail("The reader should have a record");
|
||||
rec = reader.next();
|
||||
Assert.assertTrue(rec.isFiltered());
|
||||
Assert.assertTrue(rec.getFilterString().equals("bar;baz"));
|
||||
|
||||
if (reader.hasNext()) Assert.fail("The reader should NOT have a record");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHeaderNoRecords() {
|
||||
VCFReader reader = new VCFReader(headerNoRecordsFile);
|
||||
Assert.assertTrue(reader.getHeader().getMetaData() != null);
|
||||
Assert.assertTrue(!reader.iterator().hasNext());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHeaderSampleSpaceFile() {
|
||||
VCFReader reader = new VCFReader(headerSampleSpaceFile);
|
||||
Assert.assertTrue(reader.getHeader().hasGenotypingData());
|
||||
Assert.assertTrue(reader.getHeader().getGenotypeSamples().size() == 1);
|
||||
Assert.assertTrue(reader.getHeader().getGenotypeSamples().contains("SAMPLE NAME"));
|
||||
}
|
||||
}
|
||||
|
|
@ -1,169 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
import java.util.*;
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class VCFRecordUnitTest
|
||||
* <p/>
|
||||
* test the basic functionality of the vcf record
|
||||
*/
|
||||
public class VCFRecordUnitTest extends BaseTest {
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeTests() {
|
||||
try {
|
||||
IndexedFastaSequenceFile seq = new IndexedFastaSequenceFile(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
|
||||
GenomeLocParser.setupRefContigOrdering(seq);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("unable to load the sequence dictionary");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* create a fake VCF record
|
||||
*
|
||||
* @param infoFields the info fields
|
||||
* @return a VCFRecord
|
||||
*/
|
||||
private static VCFRecord makeFakeVCFRecord(Map<String, String> infoFields) {
|
||||
List<VCFGenotypeEncoding> altBases = new ArrayList<VCFGenotypeEncoding>();
|
||||
altBases.add(new VCFGenotypeEncoding("C"));
|
||||
altBases.add(new VCFGenotypeEncoding("D1"));
|
||||
List<VCFGenotypeRecord> genotypeObjects = new ArrayList<VCFGenotypeRecord>();
|
||||
genotypeObjects.add(createGenotype("sample1", "A", "A"));
|
||||
return new VCFRecord("A", "chr1", 1, "RANDOM", altBases, 0, ".", infoFields, "GT:AA", genotypeObjects);
|
||||
}
|
||||
|
||||
/**
|
||||
* create a fake VCF genotype record
|
||||
*
|
||||
* @param name the name of the sample
|
||||
* @param Allele1 the first allele
|
||||
* @param Allele2 the second allele
|
||||
* @return a VCFGenotypeRecord
|
||||
*/
|
||||
private static VCFGenotypeRecord createGenotype(String name, String Allele1, String Allele2) {
|
||||
List<VCFGenotypeEncoding> Alleles = new ArrayList<VCFGenotypeEncoding>();
|
||||
Alleles.add(new VCFGenotypeEncoding(Allele1));
|
||||
Alleles.add(new VCFGenotypeEncoding(Allele2));
|
||||
VCFGenotypeRecord rec = new VCFGenotypeRecord(name, Alleles, VCFGenotypeRecord.PHASE.PHASED);
|
||||
rec.setField("AA", "2");
|
||||
return rec;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAddReduntantAlts() {
|
||||
List<VCFGenotypeEncoding> altBases = new ArrayList<VCFGenotypeEncoding>();
|
||||
altBases.add(new VCFGenotypeEncoding("C"));
|
||||
altBases.add(new VCFGenotypeEncoding("D1"));
|
||||
altBases.add(new VCFGenotypeEncoding("D1"));
|
||||
List<VCFGenotypeRecord> genotypeObjects = new ArrayList<VCFGenotypeRecord>();
|
||||
genotypeObjects.add(createGenotype("sample1", "A", "A"));
|
||||
VCFRecord rec = new VCFRecord("A", "chr1", 1, "RANDOM", altBases, 0, ".", new HashMap<String,String>(), "GT:AA", genotypeObjects);
|
||||
Assert.assertEquals(2, rec.getAlternateAlleles().size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetOneGenotype() {
|
||||
Map<String, String> infoFields = new HashMap<String, String>();
|
||||
VCFRecord rec = makeFakeVCFRecord(infoFields);
|
||||
List<VCFGenotypeRecord> genotypeObjects = rec.getVCFGenotypeRecords();
|
||||
Assert.assertEquals(1, genotypeObjects.size());
|
||||
Assert.assertTrue(genotypeObjects.get(0).getSampleName().equals("sample1"));
|
||||
Assert.assertEquals(2, genotypeObjects.get(0).getAlleles().size());
|
||||
Assert.assertEquals("A", genotypeObjects.get(0).getAlleles().get(0).toString());
|
||||
Assert.assertEquals("A", genotypeObjects.get(0).getAlleles().get(1).toString());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetGenotypes() {
|
||||
Map<String, String> infoFields = new HashMap<String, String>();
|
||||
VCFRecord rec = makeFakeVCFRecord(infoFields);
|
||||
rec.addGenotypeRecord(createGenotype("sample2", "C", "A"));
|
||||
List<VCFGenotypeRecord> genotypeObjects = rec.getVCFGenotypeRecords();
|
||||
Assert.assertEquals(2, genotypeObjects.size());
|
||||
Assert.assertTrue(genotypeObjects.get(0).getSampleName().equals("sample1"));
|
||||
Assert.assertEquals(2, genotypeObjects.get(0).getAlleles().size());
|
||||
Assert.assertEquals("A", genotypeObjects.get(0).getAlleles().get(0).toString());
|
||||
Assert.assertEquals("A", genotypeObjects.get(0).getAlleles().get(1).toString());
|
||||
|
||||
// assert the second one
|
||||
Assert.assertTrue(genotypeObjects.get(1).getSampleName().equals("sample2"));
|
||||
Assert.assertEquals(2, genotypeObjects.get(1).getAlleles().size());
|
||||
Assert.assertEquals("C", genotypeObjects.get(1).getAlleles().get(0).toString());
|
||||
Assert.assertEquals("A", genotypeObjects.get(1).getAlleles().get(1).toString());
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreateInfoString() {
|
||||
Map<String, String> infoFields = new HashMap<String, String>();
|
||||
VCFRecord rec = makeFakeVCFRecord(infoFields);
|
||||
Assert.assertTrue(rec.createInfoString().equals("."));
|
||||
infoFields.put("DP", "50");
|
||||
VCFRecord rec2 = makeFakeVCFRecord(infoFields);
|
||||
Assert.assertTrue(rec2.createInfoString().equals("DP=50"));
|
||||
rec2.addInfoField("AB", "CD");
|
||||
Assert.assertTrue(rec2.createInfoString().equals("DP=50;AB=CD") || rec2.createInfoString().equals("AB=CD;DP=50"));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testAddAlts() {
|
||||
Map<String, String> infoFields = new HashMap<String, String>();
|
||||
VCFRecord rec = makeFakeVCFRecord(infoFields);
|
||||
rec.addAlternateBase(new VCFGenotypeEncoding("T"));
|
||||
rec.addAlternateBase(new VCFGenotypeEncoding("T"));
|
||||
rec.addAlternateBase(new VCFGenotypeEncoding("T"));
|
||||
rec.addAlternateBase(new VCFGenotypeEncoding("T"));
|
||||
rec.addAlternateBase(new VCFGenotypeEncoding("T"));
|
||||
Assert.assertEquals(3,rec.getAlternateAlleles().size());
|
||||
}
|
||||
|
||||
/**
|
||||
* create a fake header of known quantity
|
||||
*
|
||||
* @return a fake VCF header
|
||||
*/
|
||||
public static VCFHeader createFakeHeader() {
|
||||
Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
|
||||
metaData.add(new VCFHeaderLine(VCFHeader.FILE_FORMAT_KEY, VCFHeader.VCF_VERSION));
|
||||
metaData.add(new VCFHeaderLine("two", "2"));
|
||||
Set<String> additionalColumns = new HashSet<String>();
|
||||
additionalColumns.add("FORMAT");
|
||||
additionalColumns.add("sample1");
|
||||
return new VCFHeader(metaData, additionalColumns);
|
||||
}
|
||||
|
||||
private static final String stringRep = "chr1\t1\tRANDOM\tA\tC,D1\t0.00\t.\tDP=50\tGT:AA\t0|0:2";
|
||||
private static final String stringRep2 = "chr1\t1\tRANDOM\tA\tC,D1\t0.00\t.\tAB=CD;DP=50\tGT:AA\t0|0:2";
|
||||
//private static final String stringRep3 = "chr1\t1\tRANDOM\tA\tC,D1\t0.00\t.\tAB=CD;DP=50\tGT:AA\t0|0:2";
|
||||
|
||||
@Test
|
||||
public void testStringRepresentation() {
|
||||
Map<String, String> infoFields = new HashMap<String, String>();
|
||||
infoFields.put("DP", "50");
|
||||
VCFRecord rec = makeFakeVCFRecord(infoFields);
|
||||
String rep = rec.toStringEncoding(createFakeHeader());
|
||||
Assert.assertTrue(stringRep.equals(rep));
|
||||
rec.addInfoField("AB", "CD");
|
||||
String rep2 = rec.toStringEncoding(createFakeHeader());
|
||||
Assert.assertTrue(stringRep2.equals(rep2));
|
||||
//rec.addGenotypeField(createGenotype("sample3","A","D12"));
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
|
|
|||
Loading…
Reference in New Issue