VCF moved over to tribble.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3302 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2010-05-05 17:28:48 +00:00
parent ad11201235
commit a68f3b2e9c
98 changed files with 356 additions and 3580 deletions

View File

@ -29,10 +29,10 @@ import java.io.*;
import java.util.Set;
import java.util.HashSet;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.StingException;

View File

@ -1,10 +1,10 @@
package org.broadinstitute.sting.gatk.io.storage;
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeWriter;
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine;
import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub;
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
import java.io.File;
import java.util.Set;

View File

@ -1,9 +1,9 @@
package org.broadinstitute.sting.gatk.io.stubs;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeWriter;
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import java.io.File;

View File

@ -1,351 +0,0 @@
package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.*;
/**
* @author aaron
* <p/>
* Class RodVCF
* <p/>
* An implementation of the ROD for VCF.
*/
public class RodVCF extends BasicReferenceOrderedDatum implements Iterator<RodVCF> {
public VCFReader getReader() {
return mReader;
}
// our VCF related information
private VCFReader mReader;
public VCFRecord getRecord() {
return mCurrentRecord;
}
public VCFRecord mCurrentRecord;
public RodVCF(String name) {
super(name);
}
public RodVCF(String name, VCFRecord currentRecord, VCFReader reader) {
super(name);
mCurrentRecord = currentRecord;
mReader = reader;
}
public void assertNotNull() {
if ( mCurrentRecord == null ) {
throw new UnsupportedOperationException("The current Record is null");
}
}
@Override
public boolean parseLine(Object header, String[] parts) throws IOException {
throw new UnsupportedOperationException("RodVCF does not support the parseLine method");
}
public Object initialize(final File source) throws FileNotFoundException {
if ( mReader == null ) {
mReader = new VCFReader(source);
}
return mReader.getHeader();
}
@Override
public String toString() {
return (mCurrentRecord != null ? mCurrentRecord.toStringEncoding(mReader.getHeader()) : "");
}
public static RodVCF createIterator(String name, File file) {
RodVCF vcf = new RodVCF(name);
try {
vcf.initialize(file);
} catch (FileNotFoundException e) {
throw new StingException("Unable to find file " + file);
}
return vcf;
}
public boolean hasNonRefAlleleFrequency() {
assertNotNull();
return mCurrentRecord.getNonRefAlleleFrequency() > 0.0;
}
/**
* get the frequency of this variant
*
* @return VariantFrequency with the stored frequency
*/
public double getNonRefAlleleFrequency() {
assertNotNull();
return mCurrentRecord.getNonRefAlleleFrequency();
}
public String getID() {
assertNotNull();
return mCurrentRecord.getID();
}
/**
* are we a SNP? If not we're a Indel/deletion
*
* @return true if we're a SNP
*/
public boolean isSNP() {
assertNotNull();
return mCurrentRecord.isSNP();
}
/**
* are we a novel site? Is there a DBSNP identifier
* or a hapmap entry for the site?
*
* @return true if this site is novel
*/
public boolean isNovel() {
assertNotNull();
return mCurrentRecord.isNovel();
}
/**
* are we an insertion?
*
* @return true if we are, false otherwise
*/
public boolean isInsertion() {
assertNotNull();
return mCurrentRecord.isInsertion();
}
/**
* are we an insertion?
*
* @return true if we are, false otherwise
*/
public boolean isDeletion() {
assertNotNull();
return mCurrentRecord.isDeletion();
}
@Override
public GenomeLoc getLocation() {
assertNotNull();
return mCurrentRecord.getLocation();
}
/**
* get the reference base(s) at this position
*
* @return the reference base or bases, as a string
*/
public String getReference() {
assertNotNull();
return mCurrentRecord.getReference();
}
/** are we bi-allelic?
* @return true if we're bi-allelic
*/
public boolean isBiallelic() {
assertNotNull();
return mCurrentRecord.isBiallelic();
}
/**
* get the -1 * (log 10 of the error value)
*
* @return the log based error estimate
*/
public double getNegLog10PError() {
assertNotNull();
return mCurrentRecord.getNegLog10PError();
}
public double getQual() {
assertNotNull();
return mCurrentRecord.getQual();
}
public boolean hasAlternateAllele() {
assertNotNull();
return mCurrentRecord.hasAlternateAllele();
}
/**
* gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference base. This is returned as a string list with no guarantee ordering
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
* frequency).
*
* @return an alternate allele list
*/
public List<String> getAlternateAlleleList() {
assertNotNull();
return mCurrentRecord.getAlternateAlleleList();
}
/**
* gets the alleles. This method should return all the alleles present at the location,
* including the reference base. The first allele should always be the reference allele, followed
* by an unordered list of alternate alleles.
*
* @return an alternate allele list
*/
public List<String> getAlleleList() {
assertNotNull();
return mCurrentRecord.getAlleleList();
}
/**
* are we truely a variant, given a reference
*
* @return false if we're a variant(indel, delete, SNP, etc), true if we're not
*/
public boolean isReference() {
assertNotNull();
return mCurrentRecord.isReference();
}
/**
* are we an insertion or a deletion? yes, then return true. No? Well, false then.
*
* @return true if we're an insertion or deletion
*/
public boolean isIndel() {
assertNotNull();
return mCurrentRecord.isIndel();
}
/**
* gets the alternate base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
* of
*
* @return a char, representing the alternate base
*/
public char getAlternativeBaseForSNP() {
assertNotNull();
return mCurrentRecord.getAlternativeBaseForSNP();
}
/**
* gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP
*
* @return a char, representing the alternate base
*/
public char getReferenceForSNP() {
assertNotNull();
return mCurrentRecord.getReferenceForSNP();
}
public boolean hasGenotypeData() {
assertNotNull();
return mCurrentRecord.hasGenotypeData();
}
/**
* get the genotypes
*
* @return a list of the genotypes
*/
public List<VCFGenotypeRecord> getVCFGenotypeRecords() {
assertNotNull();
return mCurrentRecord.getVCFGenotypeRecords();
}
/**
* Returns the genotype associated with sample, or null if the genotype is missing
*
* @param sampleName the name of the sample genotype to fetch
* @return genotype record
*/
public VCFGenotypeRecord getGenotype(final String sampleName) {
return mCurrentRecord.getGenotype(sampleName);
}
public String[] getSampleNames() {
assertNotNull();
return mCurrentRecord.getSampleNames();
}
// public Map<String, Genotype> getSampleGenotypes() {
// String[] samples = getSampleNames();
// List<Genotype> genotypes = getGenotypes();
// HashMap<String, Genotype> map = new HashMap<String, Genotype>();
//
// for ( int i = 0; i < samples.length; i++ ) {
// map.put(samples[i], genotypes.get(i));
// }
//
// return map;
// }
public Map<String, String> getInfoValues() {
assertNotNull();
return mCurrentRecord.getInfoValues();
}
public String[] getFilteringCodes() {
assertNotNull();
return mCurrentRecord.getFilteringCodes();
}
public boolean isFiltered() {
assertNotNull();
return mCurrentRecord.isFiltered();
}
// public boolean hasFilteringCodes() {
// assertNotNull();
// return mCurrentRecord.hasFilteringCodes();
// }
public String getFilterString() {
assertNotNull();
return mCurrentRecord.getFilterString();
}
public VCFHeader getHeader() {
return mReader.getHeader();
}
public boolean hasNext() {
return mReader.hasNext();
}
/**
* @return the next element in the iteration.
* @throws NoSuchElementException - iterator has no more elements.
*/
public RodVCF next() {
if (!this.hasNext()) throw new NoSuchElementException("RodVCF next called on iterator with no more elements");
// get the next record
VCFRecord rec = mReader.next();
// make sure the next VCF record isn't before the current record (we'll accept at the same location, the
// spec doesn't indicate, and it seems like a valid use case)
GenomeLoc curPosition = null;
if (mCurrentRecord != null) curPosition = mCurrentRecord.getLocation();
if (curPosition != null && rec != null && curPosition.compareTo(rec.getLocation()) > 0)
throw new StingException("The next VCF record appears to be before the current (current location => " + curPosition.toString() +
", the next record position => " + rec.getLocation().toString() + " with line : " + rec.toStringEncoding(mReader.getHeader()) + "). " +
"Check to make sure the input VCF file is correctly sorted.");
// save off the previous record. This is needed given how iterators are used in the ROD system;
// we need to save off the last record
mCurrentRecord = rec;
return new RodVCF(name, rec, mReader);
}
public void remove() {
throw new UnsupportedOperationException("The remove operation is not supported for a VCF rod");
}
}

View File

@ -4,6 +4,7 @@ import edu.mit.broad.picard.genotype.DiploidGenotype;
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
import org.broad.tribble.dbsnp.DbSNPFeature;
import org.broad.tribble.gelitext.GeliTextFeature;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableGenotype;
@ -16,7 +17,6 @@ import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter;
import org.broadinstitute.sting.utils.genotype.glf.GLFSingleCall;
import org.broadinstitute.sting.utils.genotype.glf.GLFWriter;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import java.util.*;
@ -46,7 +46,6 @@ public class VariantContextAdaptors {
static {
adaptors.put(DbSNPFeature.class, new DBSnpAdaptor());
adaptors.put(RodVCF.class, new RodVCFAdaptor());
adaptors.put(VCFRecord.class, new VCFRecordAdaptor());
adaptors.put(PlinkRod.class, new PlinkRodAdaptor());
adaptors.put(HapMapROD.class, new HapMapAdaptor());
@ -132,23 +131,6 @@ public class VariantContextAdaptors {
}
}
// --------------------------------------------------------------------------------------------------------------
//
// VCF to VariantContext and back again
//
// --------------------------------------------------------------------------------------------------------------
private static class RodVCFAdaptor extends VCAdaptor {
// WARNING: do not use this method if you have anything other than point mutations in your VCF
VariantContext convert(String name, Object input) {
return vcfToVariantContext(name, ((RodVCF)input).getRecord(), null);
}
VariantContext convert(String name, Object input, ReferenceContext ref) {
return vcfToVariantContext(name, ((RodVCF)input).getRecord(), ref);
}
}
private static class VCFRecordAdaptor extends VCAdaptor {
// WARNING: do not use this method if you have anything other than point mutations in your VCF
VariantContext convert(String name, Object input) {
@ -229,7 +211,7 @@ public class VariantContextAdaptors {
double qual = vcf.isMissingQual() ? VariantContext.NO_NEG_LOG_10PERROR : vcf.getNegLog10PError();
GenomeLoc loc = vcf.getLocation();
GenomeLoc loc = GenomeLocParser.createGenomeLoc(vcf.getChr(),vcf.getStart());
if ( vcf.isDeletion() )
loc = GenomeLocParser.createGenomeLoc(loc.getContig(), loc.getStart(), loc.getStart()+refAllele.length()-1);

View File

@ -66,7 +66,6 @@ public class RODTrackBuilder implements RMDTrackBuilder {
Types.put("HapMap", HapMapROD.class);
Types.put("Intervals", IntervalRod.class);
Types.put("GLF", RodGLF.class);
Types.put("VCF", RodVCF.class);
Types.put("PicardDbSNP", rodPicardDbSNP.class);
Types.put("HapmapVCF", HapmapVCFROD.class);
Types.put("Beagle", BeagleROD.class);

View File

@ -67,8 +67,7 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
public Map<String, Class> getAvailableTrackNamesAndTypes() {
Map<String, Class> classes = new HashMap<String, Class>();
for (String c : this.pluginsByName.keySet())
// we're excluding VCF right now
if (!c.contains("VCF")) classes.put(c,this.pluginsByName.get(c));
classes.put(c,this.pluginsByName.get(c));
return classes;
}
@ -88,15 +87,13 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
// make a feature reader
FeatureReader reader;
try {
FeatureCodec codec = this.createByType(targetClass);
// check to see if the input file has an index
if (!(new File(inputFile.getAbsolutePath() + linearIndexExtension).canRead())) {
LinearIndex index = createIndex(inputFile, codec);
reader = new FeatureReader(inputFile,index, codec);
LinearIndex index = createIndex(inputFile, this.createByType(targetClass));
reader = new FeatureReader(inputFile,index, this.createByType(targetClass));
}
else {
reader = new FeatureReader(inputFile,codec);
reader = new FeatureReader(inputFile,this.createByType(targetClass));
}
} catch (FileNotFoundException e) {
throw new StingException("Unable to create reader with file " + inputFile, e);

View File

@ -26,6 +26,10 @@
package org.broadinstitute.sting.gatk.walkers;
import org.broad.tribble.dbsnp.DbSNPFeature;
import org.broad.tribble.vcf.VCFGenotypeRecord;
import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
@ -95,8 +99,8 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
throw new IllegalStateException("VCF record was created, but no rod data is present");
Object rod = rods.get(0);
if ( rod instanceof RodVCF )
samples.addAll(Arrays.asList(((RodVCF)rod).getSampleNames()));
if ( rod instanceof VCFRecord )
samples.addAll(Arrays.asList(((VCFRecord)rod).getSampleNames()));
else if ( rod instanceof HapMapROD )
samples.addAll(Arrays.asList(((HapMapROD)rod).getSampleIDs()));
else

View File

@ -1,12 +1,12 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.TabularROD;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.HashMap;
import java.util.Map;

View File

@ -25,13 +25,13 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map;
import java.util.HashMap;

View File

@ -1,8 +1,8 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.List;

View File

@ -1,12 +1,12 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map;
import java.util.HashMap;

View File

@ -1,10 +1,10 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.vcf.VCFFormatHeaderLine;
import org.broadinstitute.sting.gatk.contexts.*;
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
import org.broadinstitute.sting.utils.genotype.vcf.VCFFormatHeaderLine;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.PileupElement;

View File

@ -1,12 +1,12 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map;
import java.util.HashMap;

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
@ -36,7 +37,7 @@ import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ExtendedPileupElement;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.*;
import net.sf.samtools.SAMRecord;

View File

@ -1,11 +1,11 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.genotype.HardyWeinbergCalculation;
import org.broadinstitute.sting.utils.QualityUtils;

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
@ -7,7 +8,6 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map;
import java.util.HashMap;

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
@ -7,7 +8,6 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map;
import java.util.HashMap;

View File

@ -1,8 +1,8 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.List;

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
@ -8,7 +9,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnot
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map;
import java.util.HashMap;

View File

@ -1,15 +1,14 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map;
import java.util.ArrayList;
import java.util.HashMap;

View File

@ -1,11 +1,11 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map;
import java.util.HashMap;

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
@ -9,8 +11,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnota
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map;
import java.util.ArrayList;

View File

@ -25,11 +25,11 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
@ -7,7 +8,6 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map;
import java.util.HashMap;

View File

@ -25,12 +25,14 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RodVCF;
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotationType;
@ -41,8 +43,6 @@ import org.broadinstitute.sting.utils.classloader.PackageUtils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeader;
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
@ -184,7 +184,7 @@ public class VariantAnnotator extends LocusWalker<Integer, Integer> {
}
}
if ( variant instanceof RodVCF ) {
if ( variant instanceof VCFRecord) {
for(VariantContext annotatedVC : annotatedVCs ) {
vcfWriter.addRecord(VariantContextAdaptors.toVCF(annotatedVC, ref.getBase()));
}

View File

@ -26,6 +26,9 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.dbsnp.DbSNPFeature;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
@ -41,9 +44,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnot
import org.broadinstitute.sting.playground.gatk.walkers.annotator.GenomicAnnotation;
import org.broadinstitute.sting.utils.classloader.PackageUtils;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
import java.util.*;

View File

@ -1,11 +1,11 @@
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
import org.broad.tribble.vcf.VCFFormatHeaderLine;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
import org.broadinstitute.sting.utils.genotype.vcf.VCFFormatHeaderLine;
import java.util.Map;

View File

@ -1,10 +1,10 @@
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.Map;

View File

@ -25,10 +25,13 @@
package org.broadinstitute.sting.gatk.walkers.concordance;
import org.broad.tribble.vcf.VCFGenotypeRecord;
import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RodVCF;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.classloader.PackageUtils;
@ -157,12 +160,14 @@ public class CallsetConcordanceWalker extends RodWalker<Integer, Integer> {
return 0;
// get all of the vcf rods at this locus
ArrayList<RodVCF> vcfRods = new ArrayList<RodVCF>();
Map<VCFRecord,String> vcfRods = new LinkedHashMap<VCFRecord,String>();
Iterator<GATKFeature> rods = rodData.getAllRods().iterator();
while (rods.hasNext()) {
GATKFeature rod = rods.next();
if ( rod.getUnderlyingObject() instanceof RodVCF )
vcfRods.add((RodVCF)rod.getUnderlyingObject());
if ( rod.getUnderlyingObject() instanceof VCFRecord ) {
if (vcfRods.containsKey(rod)) throw new StingException("Duplicate VCF's found");
vcfRods.put((VCFRecord)rod.getUnderlyingObject(),rod.getName());
}
}
if ( vcfRods.size() == 0 )
@ -171,12 +176,12 @@ public class CallsetConcordanceWalker extends RodWalker<Integer, Integer> {
// pull out all of the individual calls from the rods and insert into a map based on the
// mapping from rod/sample to uniquified name
HashMap<String, VCFGenotypeRecord> samplesToRecords = new HashMap<String, VCFGenotypeRecord>();
for ( RodVCF rod : vcfRods ) {
for ( VCFRecord rod : vcfRods.keySet() ) {
List<VCFGenotypeRecord> records = rod.getVCFGenotypeRecords();
for ( VCFGenotypeRecord vcfRec : records ) {
String uniquifiedSample = rodNamesToSampleNames.get(new Pair<String, String>(rod.getName(), vcfRec.getSampleName()));
String uniquifiedSample = rodNamesToSampleNames.get(new Pair<String, String>(vcfRods.get(rod), vcfRec.getSampleName()));
if ( uniquifiedSample == null )
throw new StingException("Unexpected sample encountered: " + vcfRec.getSampleName() + " in rod " + rod.getName());
throw new StingException("Unexpected sample encountered: " + vcfRec.getSampleName() + " in rod " + vcfRods.get(rod));
samplesToRecords.put(uniquifiedSample, vcfRec);
}

View File

@ -1,8 +1,8 @@
package org.broadinstitute.sting.gatk.walkers.concordance;
import org.broad.tribble.vcf.VCFGenotypeRecord;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
import java.util.Map;
import java.util.Set;

View File

@ -1,11 +1,11 @@
package org.broadinstitute.sting.gatk.walkers.concordance;
import org.broad.tribble.vcf.VCFGenotypeRecord;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
import java.util.*;
@ -103,5 +103,5 @@ public class IndelSubsets implements ConcordanceType {
}
public String getInfoName() { return "IndelSubsets"; }
public VCFInfoHeaderLine getInfoDescription() { return new VCFInfoHeaderLine(getInfoName(), 1, VCFInfoHeaderLine.INFO_TYPE.String, "Indel-related subsets"); }
public VCFInfoHeaderLine getInfoDescription() { return new VCFInfoHeaderLine(getInfoName(), 1, VCFInfoHeaderLine.INFO_TYPE.String, "Indel-related subsets"); }
}

View File

@ -1,8 +1,8 @@
package org.broadinstitute.sting.gatk.walkers.concordance;
import org.broad.tribble.vcf.VCFGenotypeRecord;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
import java.util.*;
import java.util.Map.Entry;

View File

@ -1,9 +1,9 @@
package org.broadinstitute.sting.gatk.walkers.concordance;
import org.broad.tribble.vcf.VCFGenotypeRecord;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
import java.util.*;
@ -116,5 +116,5 @@ public class SNPGenotypeConcordance implements ConcordanceType {
}
public String getInfoName() { return "SnpConcordance"; }
public VCFInfoHeaderLine getInfoDescription() { return new VCFInfoHeaderLine(getInfoName(), 1, VCFInfoHeaderLine.INFO_TYPE.String, "SNP concordance test"); }
public VCFInfoHeaderLine getInfoDescription() { return new VCFInfoHeaderLine(getInfoName(), 1, VCFInfoHeaderLine.INFO_TYPE.String, "SNP concordance test"); }
}

View File

@ -1,9 +1,9 @@
package org.broadinstitute.sting.gatk.walkers.concordance;
import org.broad.tribble.vcf.VCFGenotypeRecord;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
import org.broadinstitute.sting.utils.StingException;
import java.util.*;
@ -64,5 +64,5 @@ public class SimpleVenn implements ConcordanceType {
}
public String getInfoName() { return "Venn"; }
public VCFInfoHeaderLine getInfoDescription() { return new VCFInfoHeaderLine(getInfoName(), 1, VCFInfoHeaderLine.INFO_TYPE.String, "2-way Venn split"); }
public VCFInfoHeaderLine getInfoDescription() { return new VCFInfoHeaderLine(getInfoName(), 1, VCFInfoHeaderLine.INFO_TYPE.String, "2-way Venn split"); }
}

View File

@ -25,7 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.filters;
import org.apache.commons.jexl.Expression;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
@ -40,6 +40,7 @@ import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import java.util.*;

View File

@ -25,11 +25,11 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broad.tribble.vcf.VCFGenotypeRecord;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import org.broadinstitute.sting.utils.genotype.CalledGenotype;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;

View File

@ -1,9 +1,9 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broad.tribble.dbsnp.DbSNPFeature;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.pileup.*;
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.*;
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;

View File

@ -25,6 +25,10 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broad.tribble.vcf.VCFGenotypeRecord;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.sequenom;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
@ -38,6 +39,7 @@ import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import java.util.*;

View File

@ -1,13 +1,13 @@
package org.broadinstitute.sting.oneoffprojects.refdata;
import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.gatk.refdata.BasicReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.RodVCF;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeader;
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
import java.io.File;
import java.io.FileNotFoundException;
@ -23,14 +23,15 @@ public class HapmapVCFROD extends BasicReferenceOrderedDatum implements Iterator
// This is a (hopefully temporary) wrapper class for certain VCF files that we want to protect from
// utilities that grab genotypes or sample names across all VCF files
private RodVCF rod;
private VCFRecord record;
private VCFReader reader;
public VCFReader getReader() {
return rod.getReader();
return reader;
}
public VCFRecord getRecord() {
return rod.getRecord();
return record;
}
public HapmapVCFROD(String name) {
@ -39,85 +40,77 @@ public class HapmapVCFROD extends BasicReferenceOrderedDatum implements Iterator
public HapmapVCFROD(String name, VCFRecord currentRecord, VCFReader reader) {
super(name);
rod = new RodVCF(name,currentRecord,reader);
}
public HapmapVCFROD(String name, RodVCF rod) {
super(name);
this.rod = rod;
this.record = currentRecord;
this.reader = reader;
}
public Object initialize(final File source) throws FileNotFoundException {
rod = new RodVCF(name);
rod.initialize(source);
return rod.getHeader();
reader = new VCFReader(source);
if (reader.hasNext()) record = reader.next();
return reader.getHeader();
}
public boolean parseLine(Object obj, String[] args) {
try {
return rod.parseLine(obj,args);
} catch (Exception e) {
throw new UnsupportedOperationException("Parse line not supported",e);
}
return false;
}
public double getNegLog10PError() {
return rod.getNegLog10PError();
return record.getNegLog10PError();
}
public String getReference() {
return rod.getReference();
return record.getReference();
}
public String toString() {
return rod.toString();
return record.toString();
}
public List<String> getAlternateAlleleList() {
return rod.getAlternateAlleleList();
return record.getAlternateAlleleList();
}
public boolean isDeletion() {
return rod.isDeletion();
return record.isDeletion();
}
public GenomeLoc getLocation() {
return rod.getLocation();
return GenomeLocParser.createGenomeLoc(record.getChr(),record.getStart());
}
public boolean isBiallelic() {
return rod.isBiallelic();
return record.isBiallelic();
}
public boolean isIndel() {
return rod.isIndel();
return record.isIndel();
}
public boolean isSNP() {
return rod.isSNP();
return record.isSNP();
}
public boolean isReference() {
return rod.isReference();
return record.isReference();
}
public double getNonRefAlleleFrequency() {
return rod.getNonRefAlleleFrequency();
return record.getNonRefAlleleFrequency();
}
public char getAlternativeBaseForSNP() {
return rod.getAlternativeBaseForSNP();
return record.getAlternativeBaseForSNP();
}
public boolean isInsertion() {
return rod.isInsertion();
return record.isInsertion();
}
public List<String> getAlleleList() {
return rod.getAlleleList();
return record.getAlleleList();
}
public char getReferenceForSNP() {
return rod.getReferenceForSNP();
return record.getReferenceForSNP();
}
public boolean hasGenotype(DiploidGenotype g) {
@ -126,29 +119,29 @@ public class HapmapVCFROD extends BasicReferenceOrderedDatum implements Iterator
}
public VCFHeader getHeader() {
return rod.getHeader();
return record.getHeader();
}
public boolean hasNext() {
return rod.hasNext();
return reader.hasNext();
}
public HapmapVCFROD next() {
return new HapmapVCFROD(name,rod.next());
return new HapmapVCFROD(name, record, reader);
}
public void remove() {
rod.remove();
throw new UnsupportedOperationException("Unable to remove");
}
public static HapmapVCFROD createIterator(String name, File file) {
RodVCF vcf = new RodVCF(name);
HapmapVCFROD vcf = new HapmapVCFROD(name);
try {
vcf.initialize(file);
} catch (FileNotFoundException e) {
throw new StingException("Unable to find file " + file);
throw new StingException("Unable to load file",e);
}
return new HapmapVCFROD(name,vcf);
return vcf;
}
}

View File

@ -1,16 +1,16 @@
package org.broadinstitute.sting.oneoffprojects.walkers;
import org.broad.tribble.vcf.VCFCodec;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RodVCF;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.RMD;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import java.util.*;
@ -22,7 +22,7 @@ import java.util.*;
* Time: 3:25:11 PM
* To change this template use File | Settings | File Templates.
*/
@Requires(value= DataSource.REFERENCE,referenceMetaData = {@RMD(name="variants",type= RodVCF.class)})
@Requires(value= DataSource.REFERENCE,referenceMetaData = {@RMD(name="variants",type= VCFCodec.class)})
public class AlleleBalanceHistogramWalker extends LocusWalker<Map<String,Double>, Map<String,Set<Double>>> {
@ -47,12 +47,11 @@ public class AlleleBalanceHistogramWalker extends LocusWalker<Map<String,Double>
}
public Map<String,Double> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
RodVCF vcfRod = tracker.lookup("variants",RodVCF.class);
VCFRecord record = tracker.lookup("variants", VCFRecord.class);
if ( vcfRod == null ) {
if ( record == null ) {
return null;
}
VCFRecord record = vcfRod.getRecord();
return getAlleleBalanceBySample(record,ref,context);
}

View File

@ -1,15 +1,16 @@
package org.broadinstitute.sting.oneoffprojects.walkers;
import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeatureIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.vcf.*;
@ -93,12 +94,12 @@ public class IndelAnnotator extends RodWalker<Integer,Long>{
Object variant = rods.get(0);
if ( variant instanceof RodVCF ) {
if ( variant instanceof VCFRecord) {
RODRecordList annotationList = (refseqIterator == null ? null : refseqIterator.seekForward(ref.getLocus()));
String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList));
annotationString = annotationString.split("\\s+")[0];
((RodVCF) variant).getRecord().addInfoField("type",annotationString);
vcfWriter.addRecord(((RodVCF) variant).getRecord());
((VCFRecord) variant).addInfoField("type",annotationString);
vcfWriter.addRecord((VCFRecord) variant);
} else {
throw new StingException("This one-off walker only deals with VCF files.");
}

View File

@ -1,11 +1,12 @@
package org.broadinstitute.sting.oneoffprojects.walkers;
import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RodVCF;
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
import org.broadinstitute.sting.gatk.walkers.Reference;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
@ -13,8 +14,6 @@ import org.broadinstitute.sting.gatk.walkers.Window;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeader;
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;

View File

@ -1,12 +1,11 @@
package org.broadinstitute.sting.oneoffprojects.walkers;
import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RodVCF;
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.collections.Pair;
@ -42,14 +41,13 @@ public class VCFReferenceFixerWalker extends RodWalker<VCFRecord,Long> {
}
List<Object> rods = tracker.getReferenceMetaData("fixme");
Object rod = rods.get(0);
RodVCF vcfrod = null;
if ( rod instanceof RodVCF ) {
vcfrod = (RodVCF) rod;
VCFRecord vcfrod = null;
if ( rod instanceof VCFRecord ) {
vcfrod = (VCFRecord) rod;
}
VCFRecord rec = vcfrod.getRecord();
rec.setReferenceBase(new String(BaseUtils.charSeq2byteSeq(context.getBases())));
return rec;
if (vcfrod != null) vcfrod.setReferenceBase(new String(BaseUtils.charSeq2byteSeq(context.getBases())));
return vcfrod;
/*
VariantContext vcon = null;

View File

@ -1,10 +1,10 @@
package org.broadinstitute.sting.oneoffprojects.walkers.annotator;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;

View File

@ -25,9 +25,9 @@
package org.broadinstitute.sting.oneoffprojects.walkers.annotator;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;

View File

@ -25,11 +25,11 @@
package org.broadinstitute.sting.oneoffprojects.walkers.annotator;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;

View File

@ -25,12 +25,12 @@
package org.broadinstitute.sting.oneoffprojects.walkers.annotator;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;

View File

@ -1,12 +1,12 @@
package org.broadinstitute.sting.oneoffprojects.walkers.annotator;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.oneoffprojects.refdata.HapmapVCFROD;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
import java.util.HashMap;
import java.util.Map;

View File

@ -1,9 +1,10 @@
package org.broadinstitute.sting.oneoffprojects.walkers.varianteval.multisample;
import org.broad.tribble.vcf.VCFGenotypeRecord;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.util.Arrays;
import java.util.HashSet;
@ -76,9 +77,9 @@ class LocusConcordanceInfo {
public GenomeLoc getLoc() {
if ( concordanceType == ConcordanceType.TRUTH_SET || concordanceType == ConcordanceType.BOTH_SETS || concordanceType == ConcordanceType.TRUTH_SET_VARIANT_FILTERED) {
return truthVCFRecord.getLocation();
return GenomeLocParser.createGenomeLoc(truthVCFRecord.getChr(),truthVCFRecord.getStart());
} else {
return variantVCFRecord.getLocation();
return GenomeLocParser.createGenomeLoc( variantVCFRecord.getChr(),variantVCFRecord.getStart());
}
}

View File

@ -25,10 +25,11 @@
package org.broadinstitute.sting.oneoffprojects.walkers.varianteval.multisample;
import org.broad.tribble.vcf.VCFCodec;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RodVCF;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.RMD;
import org.broadinstitute.sting.gatk.walkers.Requires;
@ -41,7 +42,7 @@ import org.broadinstitute.sting.commandline.Argument;
* a VCF binding with the name 'variants'.
* @Author: Chris Hartl
*/
@Requires(value= DataSource.REFERENCE,referenceMetaData = {@RMD(name="truth",type= RodVCF.class),@RMD(name="variants",type= RodVCF.class)})
@Requires(value= DataSource.REFERENCE,referenceMetaData = {@RMD(name="truth",type= VCFCodec.class),@RMD(name="variants",type= VCFRecord.class)})
public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInfo, MultiSampleConcordanceSet > {
@Argument(fullName="noLowDepthLoci", shortName="NLD", doc="Do not use loci in analysis where the variant depth (as specified in the VCF) is less than the given number; "+
"DO NOT USE THIS IF YOUR VCF DOES NOT HAVE 'DP' IN THE FORMAT FIELD", required=false) private int minDepth = -1;
@ -65,14 +66,14 @@ public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInf
if ( tracker == null ) {
return null;
}
RodVCF variantData = tracker.lookup("variants",RodVCF.class);
VCFRecord variantData = tracker.lookup("variants", VCFRecord.class);
if ( ignoreKnownSites ) { // ignoreKnownSites && tracker.lookup("variants",null) != null && ! ( (RodVCF) tracker.lookup("variants",null)).isNovel() ) )
if ( variantData != null && ! variantData.isNovel() ) {
//logger.info("Not novel: "+( (RodVCF) tracker.lookup("variants",null)).getID());
return null;
}
}
RodVCF truthData = tracker.lookup("truth",RodVCF.class);
VCFRecord truthData = tracker.lookup("truth",VCFRecord.class);
LocusConcordanceInfo concordance;
if ( truthData == null && variantData == null) {
@ -82,33 +83,33 @@ public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInf
} else if ( truthData == null ) {
// not in the truth set
if ( ( (RodVCF) variantData ).isFiltered() ) {
if ( variantData.isFiltered() ) {
concordance = null;
} else {
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.VARIANT_SET,null, ( (RodVCF) variantData ).getRecord(),ref);
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.VARIANT_SET,null,variantData,ref);
}
} else if ( variantData == null ) {
// not in the variant set
if ( ( (RodVCF) truthData).isFiltered() ) {
if ( (truthData).isFiltered() ) {
concordance = null;
} else {
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.TRUTH_SET,( (RodVCF) truthData).getRecord(),null,ref);
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.TRUTH_SET,truthData,null,ref);
}
} else {
// in both
// check for filtering
boolean truth_filter = ((RodVCF) truthData).isFiltered();
boolean call_filter = ((RodVCF) variantData).isFiltered();
boolean truth_filter = truthData.isFiltered();
boolean call_filter = variantData.isFiltered();
if ( truth_filter && call_filter ) {
@ -116,15 +117,15 @@ public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInf
} else if ( truth_filter ) {
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.VARIANT_SET,null, ( (RodVCF) variantData ).getRecord(),ref);
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.VARIANT_SET,null,variantData,ref);
} else if ( call_filter ) {
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.TRUTH_SET_VARIANT_FILTERED,( (RodVCF) truthData).getRecord(), null ,ref);
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.TRUTH_SET_VARIANT_FILTERED,truthData, null ,ref);
} else {
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.BOTH_SETS,( (RodVCF) truthData).getRecord(),( (RodVCF) variantData).getRecord(),ref);
concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.BOTH_SETS,truthData,variantData,ref);
}
}

View File

@ -1,12 +1,7 @@
package org.broadinstitute.sting.oneoffprojects.walkers.varianteval.multisample;
import org.broad.tribble.vcf.VCFGenotypeRecord;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeEncoding;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
/**
* Created by IntelliJ IDEA.

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.oneoffprojects.walkers.vcftools;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
@ -77,7 +78,7 @@ public class BeagleTrioToVCFWalker extends RodWalker<VariantContext, Long> {
if ( vc != null ) {
if ( ! headerWritten ) {
RodVCF vcfrod = tracker.lookup(TRACK_NAME,RodVCF.class);
VCFRecord vcfrod = tracker.lookup(TRACK_NAME,VCFRecord.class);
writer.writeHeader(vcfrod.getHeader());
headerWritten = true;
}

View File

@ -25,10 +25,13 @@
package org.broadinstitute.sting.oneoffprojects.walkers.vcftools;
import org.broad.tribble.vcf.VCFGenotypeRecord;
import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RodVCF;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.utils.genotype.vcf.*;
@ -70,8 +73,8 @@ public class SimpleVCFIntersectWalker extends RodWalker<VCFRecordMerger,Long>{
return null;
}
RodVCF priorityCall = tracker.lookup("priority",RodVCF.class);
RodVCF otherCall = tracker.lookup("other",RodVCF.class);
VCFRecord priorityCall = tracker.lookup("priority",VCFRecord.class);
VCFRecord otherCall = tracker.lookup("other",VCFRecord.class);
if ( priorityCall == null && otherCall == null ) {
return null;
@ -82,9 +85,9 @@ public class SimpleVCFIntersectWalker extends RodWalker<VCFRecordMerger,Long>{
if ( ! union ) {
return null;
} else if ( merger.hasBeenInstantiated() ) {
merger.update(null,otherCall.getRecord());
merger.update(null,otherCall);
} else {
merger.hold(otherCall.getRecord());
merger.hold(otherCall);
}
} else if ( otherCall == null ) {
@ -92,17 +95,17 @@ public class SimpleVCFIntersectWalker extends RodWalker<VCFRecordMerger,Long>{
if ( ! union ) {
return null;
} else if ( merger.hasBeenInstantiated() ) {
merger.update(priorityCall.getRecord(),null);
merger.update(priorityCall,null);
} else {
merger.hold(priorityCall.getRecord());
merger.hold(priorityCall);
}
} else {
if ( merger.hasBeenInstantiated() ) {
merger.update(priorityCall.getRecord(), otherCall.getRecord());
merger.update(priorityCall, otherCall);
} else {
merger = instantiateMerger(priorityCall.getRecord(),otherCall.getRecord());
merger = instantiateMerger(priorityCall,otherCall);
}
}
@ -255,8 +258,8 @@ class VCFRecordMerger {
private HashMap<VCFHeader.HEADER_FIELDS,String> getFields(VCFRecord record1, VCFRecord record2) {
HashMap<VCFHeader.HEADER_FIELDS,String> fields = new HashMap<VCFHeader.HEADER_FIELDS,String>();
fields.put(VCFHeader.HEADER_FIELDS.CHROM, record1.getLocation().getContig());
fields.put(VCFHeader.HEADER_FIELDS.POS, String.format("%d",record1.getLocation().getStart()));
fields.put(VCFHeader.HEADER_FIELDS.CHROM, record1.getChr());
fields.put(VCFHeader.HEADER_FIELDS.POS, String.format("%d",record1.getStart()));
fields.put(VCFHeader.HEADER_FIELDS.ID, record1.getID());
fields.put(VCFHeader.HEADER_FIELDS.ALT, listAsString(record1.getAlternateAlleleList()));
fields.put(VCFHeader.HEADER_FIELDS.REF, record1.getReference());

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.playground.gatk.walkers;
import org.broad.tribble.vcf.VCFGenotypeRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
@ -42,7 +43,6 @@ import org.broadinstitute.sting.commandline.ArgumentCollection;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import org.broadinstitute.sting.gatk.walkers.varianteval.MendelianViolationEvaluator;

View File

@ -8,6 +8,7 @@ import java.util.Map;
import java.util.Set;
import java.util.Map.Entry;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
@ -21,7 +22,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
/**
* This plugin for {@link VariantAnnotatorEngine} serves as the core
@ -73,8 +73,8 @@ public class GenomicAnnotation implements InfoFieldAnnotation {
final Map<String, Object> annotations = new HashMap<String, Object>();
for(final GATKFeature gatkFeature : tracker.getAllRods())
{
final ReferenceOrderedDatum rod = (ReferenceOrderedDatum) gatkFeature.getUnderlyingObject();
final String name = rod.getName();
final Object rod = gatkFeature.getUnderlyingObject();
final String name = gatkFeature.getName();
if( name.equals("variant") || name.equals("interval") ) {
continue;
}

View File

@ -35,13 +35,15 @@ import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RodVCF;
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
import org.broadinstitute.sting.gatk.walkers.By;
import org.broadinstitute.sting.gatk.walkers.DataSource;
@ -50,8 +52,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeader;
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
@ -183,7 +183,7 @@ public class GenomicAnnotator extends RodWalker<Integer, Integer> {
}
}
if ( variant instanceof RodVCF ) { //TODO is this if-statement necessary?
if ( variant instanceof VCFRecord) { //TODO is this if-statement necessary?
for(VariantContext annotatedVC : annotatedVCs ) {
vcfWriter.addRecord(VariantContextAdaptors.toVCF(annotatedVC, ref.getBase()));
}

View File

@ -25,11 +25,12 @@
package org.broadinstitute.sting.playground.gatk.walkers.diagnostics;
import org.broad.tribble.vcf.VCFCodec;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RodVCF;
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.GenomeLoc;
@ -40,7 +41,7 @@ import org.broadinstitute.sting.commandline.Argument;
* Computes the density of SNPs passing and failing filters in intervals on the genome and emits a table for display
*/
@By(DataSource.REFERENCE)
@Requires(value={},referenceMetaData=@RMD(name="eval",type=RodVCF.class))
@Requires(value={},referenceMetaData=@RMD(name="eval",type= VCFCodec.class))
public class SNPDensity extends RefWalker<Pair<VariantContext, GenomeLoc>, SNPDensity.Counter> {
@Argument(fullName="granularity", shortName="granularity", doc="", required=false)
private int granularity = 1000000;
@ -64,7 +65,7 @@ public class SNPDensity extends RefWalker<Pair<VariantContext, GenomeLoc>, SNPDe
public Pair<VariantContext, GenomeLoc> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
VariantContext vc = null;
RodVCF vcf = tracker.lookup("eval",RodVCF.class);
VCFRecord vcf = tracker.lookup("eval",VCFRecord.class);
if (vcf != null)
vc = VariantContextAdaptors.toVariantContext("eval", vcf);
return new Pair<VariantContext, GenomeLoc>(vc, context.getLocation());

View File

@ -1,10 +1,10 @@
package org.broadinstitute.sting.playground.gatk.walkers.variantoptimizer;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
import java.util.*;
import java.io.IOException;

View File

@ -25,19 +25,21 @@
package org.broadinstitute.sting.playground.gatk.walkers.variantoptimizer;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RodVCF;
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import java.io.File;
import java.io.IOException;
@ -128,7 +130,7 @@ public class ApplyVariantClustersWalker extends RodWalker<ExpandingArrayList<Var
List<ReferenceOrderedDataSource> dataSources = this.getToolkit().getRodDataSources();
for ( ReferenceOrderedDataSource source : dataSources ) {
RMDTrack rod = source.getReferenceOrderedData();
if ( rod.getType().equals(RodVCF.class) ) {
if ( rod.getType().equals(VCFCodec.class) ) {
VCFReader reader = new VCFReader(rod.getFile());
Set<String> vcfSamples = reader.getHeader().getGenotypeSamples();
samples.addAll(vcfSamples);

View File

@ -25,12 +25,12 @@
package org.broadinstitute.sting.playground.gatk.walkers.variantoptimizer;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RodVCF;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
import org.broadinstitute.sting.utils.collections.Pair;
@ -83,11 +83,11 @@ public class VariantConcordanceROCCurveWalker extends RodWalker<ExpandingArrayLi
for( ReferenceOrderedDataSource rod : this.getToolkit().getRodDataSources() ) {
if( rod != null && !rod.getName().toUpperCase().startsWith("TRUTH") ) {
if( rod.getReferenceOrderedData().getIterator().hasNext() && rod.getReferenceOrderedData().getIterator().next().getUnderlyingObject() instanceof RodVCF ) {
if( rod.getReferenceOrderedData().getIterator().hasNext() && rod.getReferenceOrderedData().getIterator().next().getUnderlyingObject() instanceof VCFRecord ) {
inputRodNames.add(rod.getName());
System.out.println("Adding " + rod.getName() + " to input RodVCF list.");
if( sampleName == null && !multiSampleCalls ) {
final String[] samples = ((RodVCF)rod.getReferenceOrderedData().getIterator().next().getUnderlyingObject()).getSampleNames();
final String[] samples = ((VCFRecord)rod.getReferenceOrderedData().getIterator().next().getUnderlyingObject()).getSampleNames();
if( samples.length > 1 ) {
multiSampleCalls = true;

View File

@ -25,10 +25,12 @@
package org.broadinstitute.sting.playground.gatk.walkers.vcftools;
import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RodVCF;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.walkers.Requires;
@ -136,12 +138,13 @@ public class VCFCombine extends RodWalker<VCFRecord, VCFWriter> {
return null;
// get all of the vcf rods at this locus
ArrayList<RodVCF> vcfRods = new ArrayList<RodVCF>();
Map<VCFRecord, String> vcfRods = new LinkedHashMap<VCFRecord,String>();
Iterator<GATKFeature> rods = tracker.getAllRods().iterator();
while (rods.hasNext()) {
Object rod = rods.next().getUnderlyingObject();
if ( rod instanceof RodVCF )
vcfRods.add((RodVCF)rod);
GATKFeature feat = rods.next();
Object rod = feat.getUnderlyingObject();
if ( rod instanceof VCFRecord )
vcfRods.put((VCFRecord)rod,feat.getName());
}
if ( vcfRods.size() == 0 )
@ -166,18 +169,20 @@ public class VCFCombine extends RodWalker<VCFRecord, VCFWriter> {
return vcfWriter;
}
private VCFRecord vcfUnion(ArrayList<RodVCF> rods) {
private VCFRecord vcfUnion(Map<VCFRecord, String> rods) {
if ( priority == null )
return rods.get(0).mCurrentRecord;
return rods.keySet().iterator().next();
if ( annotateUnion ) {
Map<String, RodVCF> rodMap = new HashMap<String, RodVCF>();
for ( RodVCF vcf : rods ) { rodMap.put(vcf.getName(), vcf); }
Map<String, VCFRecord> rodMap = new HashMap<String, VCFRecord>();
for ( VCFRecord vcf : rods.keySet() ) {
rodMap.put(rods.get(vcf),vcf);
}
String priority1 = priority[0];
String priority2 = priority[1];
VCFRecord vcf1 = rodMap.containsKey(priority1) ? rodMap.get(priority1).getRecord() : null;
VCFRecord vcf2 = rodMap.containsKey(priority2) ? rodMap.get(priority2).getRecord() : null;
VCFRecord vcf1 = rodMap.containsKey(priority1) ? rodMap.get(priority1) : null;
VCFRecord vcf2 = rodMap.containsKey(priority2) ? rodMap.get(priority2) : null;
// for simplicity, we are setting set and call for vcf1
String set = priority1;
@ -213,9 +218,9 @@ public class VCFCombine extends RodWalker<VCFRecord, VCFWriter> {
return call;
} else {
for ( String rodname : priority ) {
for ( RodVCF rod : rods ) {
if ( rod.getName().equals(rodname) )
return rod.mCurrentRecord;
for ( VCFRecord rod : rods.keySet() ) {
if ( rods.get(rod).equals(rodname) )
return rod;
}
}
}

View File

@ -29,10 +29,10 @@ import org.apache.commons.jexl.Expression;
import org.apache.commons.jexl.ExpressionFactory;
import org.apache.commons.jexl.JexlContext;
import org.apache.commons.jexl.JexlHelper;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RodVCF;
import org.broadinstitute.sting.gatk.walkers.RMD;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
@ -45,7 +45,7 @@ import java.util.*;
/**
* Selects variant calls for output from a user-supplied VCF file using a number of user-selectable, parameterizable criteria.
*/
@Requires(value={},referenceMetaData=@RMD(name="variant",type= RodVCF.class))
@Requires(value={},referenceMetaData=@RMD(name="variant",type= VCFCodec.class))
public class VCFSelectWalker extends RodWalker<Integer, Integer> {
@Argument(fullName="match", shortName="match", doc="Expression used with INFO fields to select VCF records for inclusion in the output VCF(see wiki docs for more info)", required=false)
protected String[] MATCH_STRINGS = new String[]{null};
@ -66,7 +66,7 @@ public class VCFSelectWalker extends RodWalker<Integer, Integer> {
private List<MatchExp> matchExpressions = new ArrayList<MatchExp>();
private void initializeVcfWriter(RodVCF rod) {
private void initializeVcfWriter(VCFRecord record) {
// setup the header fields
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
@ -78,7 +78,7 @@ public class VCFSelectWalker extends RodWalker<Integer, Integer> {
}
writer = new VCFWriter(out);
writer.writeHeader(new VCFHeader(hInfo, rod.getHeader().getGenotypeSamples()));
writer.writeHeader(new VCFHeader(hInfo, record.getHeader().getGenotypeSamples()));
}
public void initialize() {
@ -108,15 +108,15 @@ public class VCFSelectWalker extends RodWalker<Integer, Integer> {
if ( tracker == null )
return 0;
RodVCF variant = tracker.lookup("variant",RodVCF.class);
VCFRecord variant = tracker.lookup("variant",VCFRecord.class);
// ignore places where we don't have a variant
if ( variant == null )
return 0;
boolean someoneMatched = false;
for ( MatchExp exp : matchExpressions ) {
Map<String, String> infoMap = new HashMap<String, String>(variant.mCurrentRecord.getInfoValues());
infoMap.put("QUAL", String.valueOf(variant.mCurrentRecord.getQual()));
Map<String, String> infoMap = new HashMap<String, String>(variant.getInfoValues());
infoMap.put("QUAL", String.valueOf(variant.getQual()));
JexlContext jContext = JexlHelper.createContext();
jContext.setVars(infoMap);
@ -139,13 +139,11 @@ public class VCFSelectWalker extends RodWalker<Integer, Integer> {
return 1;
}
private void writeVCF(RodVCF variant) {
VCFRecord rec = variant.mCurrentRecord;
private void writeVCF(VCFRecord variant) {
if ( writer == null )
initializeVcfWriter(variant);
writer.addRecord(rec);
writer.addRecord(variant);
}
public Integer reduce(Integer value, Integer sum) {

View File

@ -25,14 +25,15 @@
package org.broadinstitute.sting.playground.gatk.walkers.vcftools;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RodVCF;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import java.io.File;
import java.util.ArrayList;
@ -80,9 +81,8 @@ public class VCFSubsetWalker extends RodWalker<ArrayList<VCFRecord>, VCFWriter>
if (tracker != null) {
for (GATKFeature feature : tracker.getAllRods()) {
Object rod = feature.getUnderlyingObject();
if (rod instanceof RodVCF) {
RodVCF vcfrod = (RodVCF) rod;
VCFRecord record = vcfrod.mCurrentRecord;
if (rod instanceof VCFRecord) {
VCFRecord vcfrod = (VCFRecord) rod;
if (SAMPLES == null) {
SAMPLES = new HashSet<String>();
@ -95,7 +95,7 @@ public class VCFSubsetWalker extends RodWalker<ArrayList<VCFRecord>, VCFWriter>
//out.println(record.toStringEncoding(vcfrod.getHeader()));
records.add(record);
records.add(vcfrod);
}
}
}
@ -129,8 +129,8 @@ public class VCFSubsetWalker extends RodWalker<ArrayList<VCFRecord>, VCFWriter>
}
VCFRecord subset = new VCFRecord(record.getReference(),
record.getLocation().getContig(),
(int) record.getLocation().getStart(),
record.getChr(),
record.getStart(),
record.getID(),
genotypeEncodings,
record.getQual(),

View File

@ -27,6 +27,8 @@ package org.broadinstitute.sting.playground.tools.vcf;
//import org.broadinstitute.sting.playground.tools.vcf.VCFOptimize.Cut;
import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.commandline.CommandLineProgram;
import org.broadinstitute.sting.commandline.Argument;
@ -152,7 +154,7 @@ class VCFApplyCuts extends CommandLineProgram
VCFReader reader = null;
if (autocorrect) { reader = new VCFReader(VCFHomogenizer.create(filename)); }
if (autocorrect) { reader = new VCFReader(new File(filename),new VCFHomogenizer()); }
else { reader = new VCFReader(new File(filename)); }
VCFHeader header = reader.getHeader();

View File

@ -24,15 +24,18 @@
*/
package org.broadinstitute.sting.playground.tools.vcf;
import org.broad.tribble.vcf.VCFGenotypeEncoding;
import org.broad.tribble.vcf.VCFGenotypeRecord;
import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.commandline.CommandLineProgram;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import java.io.*;
import java.util.*;
import net.sf.picard.util.Interval;
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
class VCFCallRates extends CommandLineProgram
@ -62,7 +65,7 @@ class VCFCallRates extends CommandLineProgram
if (autocorrect)
{
reader = new VCFReader(VCFHomogenizer.create(filename));
reader = new VCFReader(new File(filename),new VCFHomogenizer());
}
else
{

View File

@ -25,6 +25,8 @@
package org.broadinstitute.sting.playground.tools.vcf;
import org.broad.tribble.vcf.VCFCodec;
import java.io.*;
import java.util.zip.*;
@ -34,93 +36,10 @@ import java.util.zip.*;
* @author jmaguire
*/
class VCFHomogenizer extends InputStream
{
private BufferedReader in = null;
private String currentLine;
private ByteArrayInputStream stream;
public VCFHomogenizer(Reader in)
{
this.in = (BufferedReader)in;
currentLine = this.readLine();
stream = new ByteArrayInputStream(currentLine.getBytes());
}
////////////////////////////////////////
// InputStream methods
public int available()
{
return 1;
}
public void close() throws java.io.IOException
{
this.in.close();
}
public void mark(int readlimit)
{
System.err.println("not implemented");
System.exit(-1);
}
public void reset()
{
System.err.println("not implemented");
System.exit(-1);
}
public boolean markSupported()
{
return false;
}
public int read()
{
if ((stream == null) || (stream.available() == 0))
{
currentLine = this.readLine();
if (currentLine == null) { return -1; }
stream = new ByteArrayInputStream(currentLine.getBytes());
}
return stream.read();
}
// END InputStream methods
/////////////////////////////////////
public static VCFHomogenizer create(String filename)
{
try
{
if (filename.endsWith(".gz"))
{
return new VCFHomogenizer(new BufferedReader(
new InputStreamReader(
new GZIPInputStream(
new FileInputStream(filename)))));
}
else
{
return new VCFHomogenizer(new BufferedReader(
new InputStreamReader(
new FileInputStream(filename))));
}
}
catch (Exception e)
{
e.printStackTrace();
System.exit(-1);
}
return null;
}
class VCFHomogenizer implements VCFCodec.LineTransform {
//my ($chr, $off, $id, $ref, $alt, $qual, $filter, $info, $format, @genotypes) = @tokens;
private String editLine(String input)
public String lineTransform(String input)
{
if (input == null) { return null; }
@ -228,47 +147,6 @@ class VCFHomogenizer extends InputStream
return output;
}
public String readLine()
{
try
{
String line = in.readLine();
if (line == null) { return null; }
else { return editLine(line + "\n"); }
}
catch (Exception e)
{
e.printStackTrace();
System.exit(-1);
}
return null;
}
/*
public int read()
{
throw new RuntimeException("VCFHomogenizer.read() not implemented.");
}
public byte read(byte[] b)
{
throw new RuntimeException("VCFHomogenizer.read(byte[]) not implemented.");
}
public int read(byte[] b, int off, int len)
{
throw new RuntimeException("VCFHomogenizer.read(byte[], int, int) not implemented.");
}
public long skip(long n)
{
throw new RuntimeException("VCFHomogenizer.skip(long) not implemented.");
}
public boolean markSupported() { return false; }
*/
}

View File

@ -24,6 +24,8 @@
*/
package org.broadinstitute.sting.playground.tools.vcf;
import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.commandline.CommandLineProgram;
import org.broadinstitute.sting.commandline.Argument;
@ -63,8 +65,8 @@ class VCFMerge extends CommandLineProgram
if (autocorrect)
{
reader1 = new VCFReader(VCFHomogenizer.create(filename1));
reader2 = new VCFReader(VCFHomogenizer.create(filename2));
reader1 = new VCFReader(new File(filename1),new VCFHomogenizer());
reader2 = new VCFReader(new File(filename2),new VCFHomogenizer());
}
else
{

View File

@ -24,10 +24,11 @@
*/
package org.broadinstitute.sting.playground.tools.vcf;
import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.commandline.CommandLineProgram;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
import java.io.*;
@ -306,7 +307,7 @@ class VCFOptimize extends CommandLineProgram
VCFReader reader = null;
if (autocorrect) { reader = new VCFReader(VCFHomogenizer.create(filename)); }
if (autocorrect) { reader = new VCFReader(new File(filename),new VCFHomogenizer()); }
else { reader = new VCFReader(new File(filename)); }
PrintWriter output = null;

View File

@ -24,14 +24,15 @@
*/
package org.broadinstitute.sting.playground.tools.vcf;
import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.commandline.CommandLineProgram;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import java.io.*;
import net.sf.picard.util.Interval;
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
class VCFSequenomAnalysis extends CommandLineProgram
@ -66,8 +67,8 @@ class VCFSequenomAnalysis extends CommandLineProgram
if (autocorrect)
{
reader1 = new VCFReader(VCFHomogenizer.create(filename1));
reader2 = new VCFReader(VCFHomogenizer.create(filename2));
reader1 = new VCFReader(new File(filename1),new VCFHomogenizer());
reader2 = new VCFReader(new File(filename2),new VCFHomogenizer());
}
else
{

View File

@ -24,16 +24,19 @@
*/
package org.broadinstitute.sting.playground.tools.vcf;
import org.broad.tribble.vcf.VCFGenotypeEncoding;
import org.broad.tribble.vcf.VCFGenotypeRecord;
import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.commandline.CommandLineProgram;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import java.io.*;
import java.util.*;
import java.lang.*;
import net.sf.picard.util.Interval;
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
class VCFSequenomAnalysis2 extends CommandLineProgram
@ -68,8 +71,8 @@ class VCFSequenomAnalysis2 extends CommandLineProgram
if (autocorrect)
{
reader1 = new VCFReader(VCFHomogenizer.create(filename1));
reader2 = new VCFReader(VCFHomogenizer.create(filename2));
reader1 = new VCFReader(new File(filename1),new VCFHomogenizer());
reader2 = new VCFReader(new File(filename2),new VCFHomogenizer());
}
else
{

View File

@ -24,6 +24,10 @@
*/
package org.broadinstitute.sting.playground.tools.vcf;
import org.broad.tribble.vcf.VCFGenotypeEncoding;
import org.broad.tribble.vcf.VCFGenotypeRecord;
import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.commandline.CommandLineProgram;
import org.broadinstitute.sting.commandline.Argument;
@ -65,7 +69,7 @@ class VCFValidate extends CommandLineProgram
VCFReader reader = null;
if (autocorrect) { reader = new VCFReader(VCFHomogenizer.create(filename)); }
if (autocorrect) { reader = new VCFReader(new File(filename),new VCFHomogenizer()); }
else { reader = new VCFReader(new File(filename)); }
VCFHeader header = reader.getHeader();
@ -123,8 +127,10 @@ class VCFStats extends CommandLineProgram
String stop = tokens[2];
Interval locus = new Interval(chr, Integer.parseInt(start), Integer.parseInt(stop));
if (autocorrect) { reader = new VCFReader(VCFHomogenizer.create(in_filename)); }
else { reader = new VCFReader(new File(in_filename)); }
if (autocorrect)
reader = new VCFReader(new File(in_filename),new VCFHomogenizer());
else
reader = new VCFReader(new File(in_filename));
VCFHeader header = reader.getHeader();
@ -258,7 +264,7 @@ class CheckRefFields extends CommandLineProgram
VCFReader reader = null;
if (autocorrect) { reader = new VCFReader(VCFHomogenizer.create(filename)); }
if (autocorrect) { reader = new VCFReader(new File(filename),new VCFHomogenizer()); }
else { reader = new VCFReader(new File(filename)); }
ReferenceSequenceFileWalker ref = new ReferenceSequenceFileWalker(new File(fasta_filename));
@ -274,7 +280,7 @@ class CheckRefFields extends CommandLineProgram
{
VCFRecord record = reader.next();
String chr = record.getLocation().getContig();
String chr = record.getChr();
if (! chr.equals(ref_seq_name))
{
System.out.println("Loading " + chr);
@ -282,7 +288,7 @@ class CheckRefFields extends CommandLineProgram
ref_seq_name = chr;
}
long offset = record.getLocation().getStart();
long offset = record.getStart();
char vcf_ref_base = record.getReference().charAt(0);
char fasta_ref_base = (char)ref_seq[(int)offset-1];
@ -319,7 +325,7 @@ class FixRefFields extends CommandLineProgram
VCFReader reader = null;
if (autocorrect) { reader = new VCFReader(VCFHomogenizer.create(filename)); }
if (autocorrect) { reader = new VCFReader(new File(filename),new VCFHomogenizer()); }
else { reader = new VCFReader(new File(filename)); }
ReferenceSequenceFileWalker ref = new ReferenceSequenceFileWalker(new File(fasta_filename));
@ -349,7 +355,7 @@ class FixRefFields extends CommandLineProgram
{
VCFRecord record = reader.next();
String chr = record.getLocation().getContig();
String chr = record.getChr();
if (! chr.equals(ref_seq_name))
{
System.out.println("Loading " + chr);
@ -357,7 +363,7 @@ class FixRefFields extends CommandLineProgram
ref_seq_name = chr;
}
long offset = record.getLocation().getStart();
long offset = record.getStart();
char vcf_ref_base = record.getReference().charAt(0);
char fasta_ref_base = (char)ref_seq[(int)offset-1];
@ -498,7 +504,7 @@ class VCFGrep_old extends CommandLineProgram
throw new RuntimeException(e);
}
if (autocorrect) { reader = new VCFReader(VCFHomogenizer.create(in_filename)); }
if (autocorrect) { reader = new VCFReader(new File(in_filename),new VCFHomogenizer()); }
else { reader = new VCFReader(new File(in_filename)); }
@ -528,7 +534,7 @@ class PrintGQ extends CommandLineProgram
VCFReader reader;
VCFReader reader2;
reader = new VCFReader(VCFHomogenizer.create(filename));
reader = new VCFReader(new File(filename),new VCFHomogenizer());
VCFHeader header = reader.getHeader();
VCFRecord record = reader.next();
@ -604,7 +610,7 @@ class VCFSimpleStats extends CommandLineProgram
if (autocorrect)
{
reader1 = new VCFReader(VCFHomogenizer.create(filename1));
reader1 = new VCFReader(new File(filename1),new VCFHomogenizer());
}
else
{
@ -819,8 +825,8 @@ class VCFConcordance extends CommandLineProgram
if (autocorrect)
{
reader1 = new VCFReader(VCFHomogenizer.create(filename1));
reader2 = new VCFReader(VCFHomogenizer.create(filename2));
reader1 = new VCFReader(new File(filename1),new VCFHomogenizer());
reader2 = new VCFReader(new File(filename2),new VCFHomogenizer());
}
else
{
@ -1266,8 +1272,8 @@ public class VCFTool
public static Interval getIntervalFromRecord(VCFRecord record)
{
String chr = record.getLocation().getContig();
long off = record.getLocation().getStart();
String chr = record.getChr();
long off = record.getStart();
return new Interval(chr, (int)off, (int)off);
}

View File

@ -27,12 +27,13 @@ package org.broadinstitute.sting.utils;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMReadGroupRecord;
import org.broad.tribble.vcf.VCFCodec;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RodVCF;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
import java.util.*;
@ -79,7 +80,7 @@ public class SampleUtils {
List<ReferenceOrderedDataSource> dataSources = toolkit.getRodDataSources();
for ( ReferenceOrderedDataSource source : dataSources ) {
RMDTrack rod = source.getReferenceOrderedData();
if ( rod.getType().equals(RodVCF.class) ) {
if ( rod.getType().equals(VCFRecord.class) ) {
VCFReader reader = new VCFReader(rod.getFile());
samples.addAll(reader.getHeader().getGenotypeSamples());
reader.close();
@ -108,7 +109,7 @@ public class SampleUtils {
List<ReferenceOrderedDataSource> dataSources = toolkit.getRodDataSources();
for ( ReferenceOrderedDataSource source : dataSources ) {
RMDTrack rod = source.getReferenceOrderedData();
if ( rod.getType().equals(RodVCF.class) ) {
if ( rod.getType().equals(VCFCodec.class) ) {
VCFReader reader = new VCFReader(rod.getFile());
Set<String> vcfSamples = reader.getHeader().getGenotypeSamples();
for ( String sample : vcfSamples )

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.utils.genotype;
import net.sf.samtools.SAMFileHeader;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.geli.*;
import org.broadinstitute.sting.utils.genotype.glf.*;

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.utils.genotype.glf;
import net.sf.samtools.SAMSequenceRecord;
import net.sf.samtools.util.BinaryCodec;
import net.sf.samtools.util.BlockCompressedOutputStream;
import org.broad.tribble.vcf.VCFGenotypeRecord;
import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableGenotype;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.GenomeLocParser;
@ -10,7 +11,6 @@ import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.genotype.CalledGenotype;
import org.broadinstitute.sting.utils.genotype.IndelLikelihood;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;

View File

@ -1,60 +0,0 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import org.broadinstitute.sting.utils.Utils;
/**
* @author ebanks
* <p/>
* Class VCFFilterHeaderLine
* <p/>
* A class representing a key=value entry for FILTER fields in the VCF header
*/
public class VCFFilterHeaderLine extends VCFHeaderLine {
private String mName;
private String mDescription;
/**
* create a VCF filter header line
*
* @param name the name for this header line
* @param description the description for this header line
*/
public VCFFilterHeaderLine(String name, String description) {
super("FILTER", "");
mName = name;
mDescription = description;
}
/**
* create a VCF info header line
*
* @param line the header line
*/
protected VCFFilterHeaderLine(String line) {
super("FILTER", "");
String[] pieces = line.split(",");
if ( pieces.length < 2 )
throw new IllegalArgumentException("There are too few values in the VCF FILTER header line: " + line);
mName = pieces[0];
mDescription = Utils.trim(pieces[1], '"');
// just in case there were some commas in the description
for (int i = 1; i < pieces.length; i++)
mDescription += "," + Utils.trim(pieces[i], '"');
}
protected String makeStringRep() {
return String.format("FILTER=%s,\"%s\"", mName, mDescription);
}
public boolean equals(Object o) {
if ( !(o instanceof VCFFilterHeaderLine) )
return false;
VCFFilterHeaderLine other = (VCFFilterHeaderLine)o;
return mName.equals(other.mName) &&
mDescription.equals(other.mDescription);
}
}

View File

@ -1,80 +0,0 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import org.broadinstitute.sting.utils.Utils;
/**
* @author ebanks
* <p/>
* Class VCFFormatHeaderLine
* <p/>
* A class representing a key=value entry for genotype FORMAT fields in the VCF header
*/
public class VCFFormatHeaderLine extends VCFHeaderLine {
// the format field types
public enum FORMAT_TYPE {
Integer, Float, String
}
private String mName;
private int mCount;
private String mDescription;
private FORMAT_TYPE mType;
/**
* create a VCF format header line
*
* @param name the name for this header line
* @param count the count for this header line
* @param type the type for this header line
* @param description the description for this header line
*/
public VCFFormatHeaderLine(String name, int count, FORMAT_TYPE type, String description) {
super("FORMAT", "");
mName = name;
mCount = count;
mType = type;
mDescription = description;
}
/**
* create a VCF format header line
*
* @param line the header line
*/
protected VCFFormatHeaderLine(String line) {
super("FORMAT", "");
String[] pieces = line.split(",");
if ( pieces.length < 4 )
throw new IllegalArgumentException("There are too few values in the VCF FORMAT header line: " + line);
mName = pieces[0];
mCount = Integer.valueOf(pieces[1]);
mType = FORMAT_TYPE.valueOf(pieces[2]);
mDescription = Utils.trim(pieces[3], '"');
// just in case there were some commas in the description
for (int i = 4; i < pieces.length; i++)
mDescription += "," + Utils.trim(pieces[i], '"');
}
protected String makeStringRep() {
return String.format("FORMAT=%s,%d,%s,\"%s\"", mName, mCount, mType.toString(), mDescription);
}
public String getName() { return mName; }
public int getCount() { return mCount; }
public String getDescription() { return mDescription; }
public FORMAT_TYPE getType() { return mType; }
public boolean equals(Object o) {
if ( !(o instanceof VCFFormatHeaderLine) )
return false;
VCFFormatHeaderLine other = (VCFFormatHeaderLine)o;
return mName.equals(other.mName) &&
mCount == other.mCount &&
mDescription.equals(other.mDescription) &&
mType == other.mType;
}
}

View File

@ -1,127 +0,0 @@
package org.broadinstitute.sting.utils.genotype.vcf;
/**
* @author aaron
* <p/>
* Class VCFGenotypeEncoding
* <p/>
* basic encoding class for genotype fields in VCF
*/
public class VCFGenotypeEncoding {
public enum TYPE {
SINGLE_BASE,
INSERTION,
DELETION,
UNCALLED,
MIXED // this type is only valid in aggregate, not for a single VCFGenotypeEncoding
}
// our length (0 for SINGLE_BASE), our bases, and our type
private final int mLength;
private final String mBases;
private final TYPE mType;
// public constructor, that parses out the base string
public VCFGenotypeEncoding(String baseString) {
if ((baseString.length() == 1)) {
// are we an empty (no-call) genotype?
if (baseString.equals(VCFGenotypeRecord.EMPTY_ALLELE)) {
mBases = VCFGenotypeRecord.EMPTY_ALLELE;
mLength = 0;
mType = TYPE.UNCALLED;
} else if (!validBases(baseString)) {
throw new IllegalArgumentException("Alleles of length 1 must be one of A,C,G,T, " + baseString + " was passed in");
} else { // we're a valid base
mBases = baseString.toUpperCase();
mLength = 0;
mType = TYPE.SINGLE_BASE;
}
} else { // deletion or insertion
if (baseString.length() < 1 || (baseString.toUpperCase().charAt(0) != 'D' && baseString.toUpperCase().charAt(0) != 'I')) {
throw new IllegalArgumentException("Genotype encoding of " + baseString + " was passed in, but is not a valid deletion, insertion, base, or no call (.)");
}
if (baseString.toUpperCase().charAt(0) == 'D') {
mLength = Integer.valueOf(baseString.substring(1, baseString.length()));
mBases = "";
mType = TYPE.DELETION;
} else { // we're an I
mBases = baseString.substring(1, baseString.length()).toUpperCase();
if (!validBases(mBases))
throw new IllegalArgumentException("The insertion base string contained invalid bases -> " + baseString);
mLength = mBases.length();
mType = TYPE.INSERTION;
}
}
}
public int getLength() {
return mLength;
}
public String getBases() {
return mBases;
}
public TYPE getType() {
return mType;
}
public boolean equals(Object obj) {
if ( obj == null )
return false;
if ( obj instanceof VCFGenotypeEncoding ) {
VCFGenotypeEncoding d = (VCFGenotypeEncoding) obj;
return (mType == d.mType) && (mBases.equals(d.mBases)) && (mLength == d.mLength);
}
if ( mType == TYPE.UNCALLED && obj.toString().equals(VCFGenotypeRecord.EMPTY_ALLELE) )
return true;
return false;
}
public int hashCode() {
// our underlying data is immutable, so this is safe (we won't strand a value in a hashtable somewhere
// when the data changes underneath, altering this value).
String str = this.mBases + String.valueOf(this.mLength) + this.mType.toString();
return str.hashCode();
}
/**
* dump the string representation of this genotype encoding
*
* @return string representation
*/
public String toString() {
StringBuilder builder = new StringBuilder();
switch (mType) {
case SINGLE_BASE:
case UNCALLED:
builder.append(mBases);
break;
case INSERTION:
builder.append("I");
builder.append(mBases);
break;
case DELETION:
builder.append("D");
builder.append(mLength);
break;
}
return builder.toString();
}
/**
* ensure that string contains valid bases
*
* @param bases the bases to check
*
* @return true if they're all either A,C,G,T; false otherwise
*/
private static boolean validBases(String bases) {
for (char c : bases.toUpperCase().toCharArray()) {
if (c != 'A' && c != 'C' && c != 'G' && c != 'T')
return false;
}
return true;
}
}

View File

@ -1,353 +0,0 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.*;
/**
* @author aaron
* <p/>
* Class VCFGenotypeRecord
* <p/>
*/
public class VCFGenotypeRecord {
// key names
public static final String GENOTYPE_KEY = "GT";
public static final String GENOTYPE_QUALITY_KEY = "GQ";
public static final String DEPTH_KEY = "DP";
public static final String HAPLOTYPE_QUALITY_KEY = "HQ";
public static final String GENOTYPE_FILTER_KEY = "FT";
public static final String GENOTYPE_POSTERIORS_TRIPLET_KEY = "GL";
public static final String OLD_DEPTH_KEY = "RD";
// the values for empty fields
public static final String EMPTY_GENOTYPE = "./.";
public static final String EMPTY_ALLELE = ".";
public static final int MISSING_GENOTYPE_QUALITY = -1;
public static final int MISSING_DEPTH = -1;
public static final int MISSING_HAPLOTYPE_QUALITY = -1;
public static final String PASSES_FILTERS = "0";
public static final String UNFILTERED = ".";
public static final double MAX_QUAL_VALUE = 99.0;
// what kind of phasing this genotype has
public enum PHASE {
UNPHASED, PHASED, PHASED_SWITCH_PROB, UNKNOWN
}
// our record
private VCFRecord mRecord;
// our phasing
private PHASE mPhaseType;
// our bases(s)
private final List<VCFGenotypeEncoding> mGenotypeAlleles = new ArrayList<VCFGenotypeEncoding>();
// our mapping of the format mFields to values
private final Map<String, String> mFields = new HashMap<String, String>();
// our sample name
private String mSampleName;
/**
* Create a VCF genotype record
*
* @param sampleName sample name
* @param genotypes list of genotypes
* @param phasing phasing
*/
public VCFGenotypeRecord(String sampleName, List<VCFGenotypeEncoding> genotypes, PHASE phasing) {
mSampleName = sampleName;
if (genotypes != null)
this.mGenotypeAlleles.addAll(genotypes);
mPhaseType = phasing;
}
public void setVCFRecord(VCFRecord record) {
mRecord = record;
}
public void setSampleName(String name) {
mSampleName = name;
}
/**
* Adds a field to the genotype record.
* Throws an exception if the key is GT, as that's computed internally.
*
* @param key the field name (use static variables above for common fields)
* @param value the field value
*/
public void setField(String key, String value) {
// make sure the GT field isn't being set
if ( key.equals(GENOTYPE_KEY) )
throw new IllegalArgumentException("Setting the GT field is not allowed as that's done internally");
mFields.put(key, value);
}
/**
* determine the phase of the genotype
*
* @param phase the string that contains the phase character
*
* @return the phase
*/
static PHASE determinePhase(String phase) {
// find the phasing information
if (phase.equals("/"))
return PHASE.UNPHASED;
else if (phase.equals("|"))
return PHASE.PHASED;
else if (phase.equals("\\"))
return PHASE.PHASED_SWITCH_PROB;
else
throw new IllegalArgumentException("Unknown genotype phasing parameter");
}
public PHASE getPhaseType() {
return mPhaseType;
}
public String getSampleName() {
return mSampleName;
}
public List<VCFGenotypeEncoding> getAlleles() {
return mGenotypeAlleles;
}
public Map<String, String> getFields() {
return mFields;
}
/**
* @return the phred-scaled quality score
*/
public double getQual() {
return ( mFields.containsKey(GENOTYPE_QUALITY_KEY) ? Double.valueOf(mFields.get(GENOTYPE_QUALITY_KEY)) : MISSING_GENOTYPE_QUALITY);
}
public boolean isMissingQual() {
return (int)getQual() == MISSING_GENOTYPE_QUALITY;
}
public double getNegLog10PError() {
double qual = getQual();
return (qual == MISSING_GENOTYPE_QUALITY ? MISSING_GENOTYPE_QUALITY : qual / 10.0);
}
public int getReadCount() {
return ( mFields.containsKey(DEPTH_KEY) ? Integer.valueOf(mFields.get(DEPTH_KEY)) : MISSING_DEPTH);
}
public GenomeLoc getLocation() {
return mRecord != null ? mRecord.getLocation() : null;
}
public String getReference() {
return mRecord != null ? mRecord.getReference() : "N";
}
public String getBases() {
String genotype = "";
for ( VCFGenotypeEncoding encoding : mGenotypeAlleles )
genotype += encoding.getBases();
return genotype;
}
public boolean isVariant(char ref) {
for ( VCFGenotypeEncoding encoding : mGenotypeAlleles ) {
if ( encoding.getType() == VCFGenotypeEncoding.TYPE.UNCALLED )
continue;
if ( encoding.getType() != VCFGenotypeEncoding.TYPE.SINGLE_BASE ||
encoding.getBases().charAt(0) != ref )
return true;
}
return false;
}
public boolean isPointGenotype() {
return (mRecord != null ? !mRecord.isIndel() : true);
}
public boolean isHom() {
if ( mGenotypeAlleles.size() == 0 )
return true;
String bases = mGenotypeAlleles.get(0).getBases();
for ( int i = 1; i < mGenotypeAlleles.size(); i++ ) {
if ( !bases.equals(mGenotypeAlleles.get(1).getBases()) )
return false;
}
return true;
}
public boolean isHet() {
return !isHom();
}
public boolean isNoCall() {
for ( VCFGenotypeEncoding encoding : mGenotypeAlleles ) {
if ( encoding.getType() != VCFGenotypeEncoding.TYPE.UNCALLED )
return false;
}
return true;
}
public boolean isFiltered() {
return ( mFields.get(GENOTYPE_FILTER_KEY) != null &&
!mFields.get(GENOTYPE_FILTER_KEY).equals(UNFILTERED) &&
!mFields.get(GENOTYPE_FILTER_KEY).equals(PASSES_FILTERS));
}
public int getPloidy() {
return 2;
}
public VCFRecord getRecord() {
return mRecord;
}
private String toGenotypeString(List<VCFGenotypeEncoding> altAlleles) {
String str = "";
boolean first = true;
for (VCFGenotypeEncoding allele : mGenotypeAlleles) {
if (allele.getType() == VCFGenotypeEncoding.TYPE.UNCALLED)
str += VCFGenotypeRecord.EMPTY_ALLELE;
else
str += String.valueOf((altAlleles.contains(allele)) ? altAlleles.indexOf(allele) + 1 : 0);
if (first) {
switch (mPhaseType) {
case UNPHASED:
str += "/";
break;
case PHASED:
str += "|";
break;
case PHASED_SWITCH_PROB:
str += "\\";
break;
case UNKNOWN:
throw new UnsupportedOperationException("Unknown phase type");
}
first = false;
}
}
return str;
}
@Override
public String toString() {
return String.format("[VCFGenotype %s %s %s %s]", getLocation(), mSampleName, this.mGenotypeAlleles, mFields);
}
public boolean isEmptyGenotype() {
for ( VCFGenotypeEncoding encoding : mGenotypeAlleles ) {
if ( encoding.getType() != VCFGenotypeEncoding.TYPE.UNCALLED )
return false;
}
return true;
}
public boolean equals(Object other) {
if (other instanceof VCFGenotypeRecord) {
if (((VCFGenotypeRecord) other).mPhaseType != this.mPhaseType) return false;
if (!((VCFGenotypeRecord) other).mGenotypeAlleles.equals(this.mGenotypeAlleles)) return false;
if (!((VCFGenotypeRecord) other).mFields.equals(mFields)) return false;
if (!((VCFGenotypeRecord) other).mSampleName.equals(this.mSampleName)) return false;
return true;
}
return false;
}
/**
* output a string representation of the VCFGenotypeRecord, given the alternate alleles
*
* @param altAlleles the alternate alleles, needed for toGenotypeString()
* @param genotypeFormatStrings genotype format strings
*
* @return a string
*/
public String toStringEncoding(List<VCFGenotypeEncoding> altAlleles, String[] genotypeFormatStrings) {
StringBuilder builder = new StringBuilder();
builder.append(toGenotypeString(altAlleles));
for ( String field : genotypeFormatStrings ) {
if ( field.equals(GENOTYPE_KEY) )
continue;
String value = mFields.get(field);
if ( value == null && field.equals(OLD_DEPTH_KEY) )
value = mFields.get(DEPTH_KEY);
builder.append(VCFRecord.GENOTYPE_FIELD_SEPERATOR);
if ( value == null || value.equals("") )
builder.append(getMissingFieldValue(field));
else
builder.append(value);
}
return builder.toString();
}
/**
* output a string representation of an empty genotype
*
* @param genotypeFormatStrings genotype format strings
*
* @return a string
*/
public static String stringEncodingForEmptyGenotype(String[] genotypeFormatStrings) {
StringBuilder builder = new StringBuilder();
builder.append(EMPTY_GENOTYPE);
for ( String field : genotypeFormatStrings ) {
if ( field.equals(GENOTYPE_KEY) )
continue;
builder.append(VCFRecord.GENOTYPE_FIELD_SEPERATOR);
builder.append(getMissingFieldValue(field));
}
return builder.toString();
}
public static String getMissingFieldValue(String field) {
String result = "";
if ( field.equals(GENOTYPE_QUALITY_KEY) )
result = String.valueOf(MISSING_GENOTYPE_QUALITY);
else if ( field.equals(DEPTH_KEY) || field.equals(OLD_DEPTH_KEY) )
result = String.valueOf(MISSING_DEPTH);
else if ( field.equals(GENOTYPE_FILTER_KEY) )
result = UNFILTERED;
else if ( field.equals(GENOTYPE_POSTERIORS_TRIPLET_KEY) )
result = "0,0,0";
// TODO -- support haplotype quality
//else if ( field.equals(HAPLOTYPE_QUALITY_KEY) )
// result = String.valueOf(MISSING_HAPLOTYPE_QUALITY);
return result;
}
public static Set<VCFFormatHeaderLine> getSupportedHeaderStrings() {
Set<VCFFormatHeaderLine> result = new HashSet<VCFFormatHeaderLine>();
result.add(new VCFFormatHeaderLine(GENOTYPE_KEY, 1, VCFFormatHeaderLine.FORMAT_TYPE.String, "Genotype"));
result.add(new VCFFormatHeaderLine(GENOTYPE_QUALITY_KEY, 1, VCFFormatHeaderLine.FORMAT_TYPE.Float, "Genotype Quality"));
result.add(new VCFFormatHeaderLine(DEPTH_KEY, 1, VCFFormatHeaderLine.FORMAT_TYPE.Integer, "Read Depth (only filtered reads used for calling)"));
result.add(new VCFFormatHeaderLine(GENOTYPE_POSTERIORS_TRIPLET_KEY, 3, VCFFormatHeaderLine.FORMAT_TYPE.Float, "Log-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; not applicable if site is not biallelic"));
//result.add(new VCFFormatHeaderLine(HAPLOTYPE_QUALITY_KEY, 1, VCFFormatHeaderLine.INFO_TYPE.Integer, "Haplotype Quality"));
return result;
}
public void replaceFields(HashMap<String,String> newFields) {
mFields.clear();
for ( String s : newFields.keySet() ) {
mFields.put(s,newFields.get(s));
}
}
}

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import java.util.Set;
@ -24,7 +26,7 @@ public interface VCFGenotypeWriter extends GenotypeWriter {
* Add a given VCF record to the given output.
* @param vcfRecord Record to add.
*/
public void addRecord(VCFRecord vcfRecord);
public void addRecord(VCFRecord vcfRecord);
/**
* set the validation stringency

View File

@ -1,5 +1,9 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import org.broad.tribble.vcf.VCFFormatHeaderLine;
import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
@ -64,7 +68,7 @@ public class VCFGenotypeWriterAdapter implements VCFGenotypeWriter {
// set up the allowed genotype format fields
allowedGenotypeFormatStrings = new ArrayList<String>();
for ( VCFHeaderLine field : headerInfo ) {
if ( field instanceof VCFFormatHeaderLine )
if ( field instanceof VCFFormatHeaderLine)
allowedGenotypeFormatStrings.add(((VCFFormatHeaderLine)field).getName());
}
}

View File

@ -1,151 +0,0 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import java.util.*;
/**
* @author aaron
* <p/>
* Class VCFHeader
* <p/>
* A class representing the VCF header
*/
public class VCFHeader {
public static final String FILE_FORMAT_KEY = "fileformat";
public static final String OLD_FILE_FORMAT_KEY = "format"; // from version 3.2
/** the current vcf version we support. */
public static final String VCF_VERSION_HEADER = "VCFv";
public static final String OLD_VCF_VERSION_HEADER = "VCRv"; // from version 3.2
public static final double VCF_VERSION_NUMBER = 3.3;
public static final String VCF_VERSION = VCF_VERSION_HEADER + VCF_VERSION_NUMBER;
// the manditory header fields
public enum HEADER_FIELDS {
CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO
}
// the associated meta data
private final Set<VCFHeaderLine> mMetaData;
// the list of auxillary tags
private final Set<String> mGenotypeSampleNames = new LinkedHashSet<String>();
// the character string that indicates meta data
public static final String METADATA_INDICATOR = "##";
// the header string indicator
public static final String HEADER_INDICATOR = "#";
/** do we have genotying data? */
private boolean hasGenotypingData = false;
/**
* create a VCF header, given a list of meta data and auxillary tags
*
* @param metaData the meta data associated with this header
*/
public VCFHeader(Set<VCFHeaderLine> metaData) {
mMetaData = new TreeSet<VCFHeaderLine>(metaData);
checkVCFVersion();
}
/**
* create a VCF header, given a list of meta data and auxillary tags
*
* @param metaData the meta data associated with this header
* @param genotypeSampleNames the genotype format field, and the sample names
*/
public VCFHeader(Set<VCFHeaderLine> metaData, Set<String> genotypeSampleNames) {
mMetaData = new TreeSet<VCFHeaderLine>(metaData);
for (String col : genotypeSampleNames) {
if (!col.equals("FORMAT"))
mGenotypeSampleNames.add(col);
}
if (genotypeSampleNames.size() > 0) hasGenotypingData = true;
checkVCFVersion();
}
/**
* check our metadata for a VCF version tag, and throw an exception if the version is out of date
* or the version is not present
*/
public void checkVCFVersion() {
String version = null;
for ( VCFHeaderLine line : mMetaData ) {
if ( line.getKey().equals(FILE_FORMAT_KEY) || line.getKey().equals(OLD_FILE_FORMAT_KEY) ) {
version = line.getValue();
break;
}
}
if ( version == null )
mMetaData.add(new VCFHeaderLine(FILE_FORMAT_KEY, VCF_VERSION));
else if ( !isSupportedVersion(version) )
throw new RuntimeException("VCF version " + version +
" is not yet supported; only version " + VCF_VERSION + " and earlier can be used");
}
private boolean isSupportedVersion(String version) {
if ( !version.startsWith(VCF_VERSION_HEADER) && !version.startsWith(OLD_VCF_VERSION_HEADER) )
return false;
try {
double dVersion = Double.valueOf(version.substring(VCF_VERSION_HEADER.length()));
return dVersion <= VCF_VERSION_NUMBER;
} catch (Exception e) { }
return false;
}
/**
* get the header fields in order they're presented in the input file (which is now required to be
* the order presented in the spec).
*
* @return a set of the header fields, in order
*/
public Set<HEADER_FIELDS> getHeaderFields() {
Set<HEADER_FIELDS> fields = new LinkedHashSet<HEADER_FIELDS>();
for (HEADER_FIELDS field : HEADER_FIELDS.values())
fields.add(field);
return fields;
}
/**
* get the meta data, associated with this header
*
* @return a set of the meta data
*/
public Set<VCFHeaderLine> getMetaData() {
return mMetaData;
}
/**
* get the genotyping sample names
*
* @return a list of the genotype column names, which may be empty if hasGenotypingData() returns false
*/
public Set<String> getGenotypeSamples() {
return mGenotypeSampleNames;
}
/**
* do we have genotyping data?
*
* @return true if we have genotyping columns, false otherwise
*/
public boolean hasGenotypingData() {
return hasGenotypingData;
}
/** @return the column count, */
public int getColumnCount() {
return HEADER_FIELDS.values().length + ((hasGenotypingData) ? mGenotypeSampleNames.size() + 1 : 0);
}
}

View File

@ -1,86 +0,0 @@
package org.broadinstitute.sting.utils.genotype.vcf;
/**
* @author ebanks
* <p/>
* Class VCFHeaderLine
* <p/>
* A class representing a key=value entry in the VCF header
*/
public class VCFHeaderLine implements Comparable {
private String stringRep = null;
private String mKey = null;
private String mValue = null;
/**
* create a VCF header line
*
* @param key the key for this header line
* @param value the value for this header line
*/
public VCFHeaderLine(String key, String value) {
mKey = key;
mValue = value;
}
/**
* Get the key
*
* @return the key
*/
public String getKey() {
return mKey;
}
/**
* Set the key
*
* @param key the key for this header line
*/
public void setKey(String key) {
mKey = key;
stringRep = null;
}
/**
* Get the value
*
* @return the value
*/
public String getValue() {
return mValue;
}
/**
* Set the value
*
* @param value the value for this header line
*/
public void setValue(String value) {
mValue = value;
stringRep = null;
}
public String toString() {
if ( stringRep == null )
stringRep = makeStringRep();
return stringRep;
}
protected String makeStringRep() {
return mKey + "=" + mValue;
}
public boolean equals(Object o) {
if ( !(o instanceof VCFHeaderLine) )
return false;
return mKey.equals(((VCFHeaderLine)o).getKey()) && mValue.equals(((VCFHeaderLine)o).getValue());
}
public int compareTo(Object other) {
return toString().compareTo(other.toString());
}
}

View File

@ -1,75 +0,0 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import org.broadinstitute.sting.utils.Utils;
/**
* @author ebanks
* <p/>
* Class VCFInfoHeaderLine
* <p/>
* A class representing a key=value entry for INFO fields in the VCF header
*/
public class VCFInfoHeaderLine extends VCFHeaderLine {
// the info field types
public enum INFO_TYPE {
Integer, Float, String, Character, Flag
}
private String mName;
private int mCount;
private String mDescription;
private INFO_TYPE mType;
/**
* create a VCF info header line
*
* @param name the name for this header line
* @param count the count for this header line
* @param type the type for this header line
* @param description the description for this header line
*/
public VCFInfoHeaderLine(String name, int count, INFO_TYPE type, String description) {
super("INFO", "");
mName = name;
mCount = count;
mType = type;
mDescription = description;
}
/**
* create a VCF info header line
*
* @param line the header line
*/
protected VCFInfoHeaderLine(String line) {
super("INFO", "");
String[] pieces = line.split(",");
if ( pieces.length < 4 )
throw new IllegalArgumentException("There are too few values in the VCF INFO header line: " + line);
mName = pieces[0];
mCount = Integer.valueOf(pieces[1]);
mType = INFO_TYPE.valueOf(pieces[2]);
mDescription = Utils.trim(pieces[3], '"');
// just in case there were some commas in the description
for (int i = 4; i < pieces.length; i++)
mDescription += "," + Utils.trim(pieces[i], '"');
}
protected String makeStringRep() {
return String.format("INFO=%s,%d,%s,\"%s\"", mName, mCount, mType.toString(), mDescription);
}
public boolean equals(Object o) {
if ( !(o instanceof VCFInfoHeaderLine) )
return false;
VCFInfoHeaderLine other = (VCFInfoHeaderLine)o;
return mName.equals(other.mName) &&
mCount == other.mCount &&
mDescription.equals(other.mDescription) &&
mType == other.mType;
}
}

View File

@ -1,5 +1,8 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import org.broad.tribble.vcf.VCFGenotypeEncoding;
import org.broad.tribble.vcf.VCFGenotypeRecord;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;

View File

@ -20,6 +20,9 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
import org.broad.tribble.FeatureReader;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils;
/** The VCFReader class, which given a valid vcf file, parses out the header and VCF records */
@ -28,14 +31,11 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
// our VCF header
private VCFHeader mHeader;
// our buffered input stream
private BufferedReader mReader;
// our iterator
private Iterator<VCFRecord> iterator;
// our next record
private VCFRecord mNextRecord = null;
// our pattern matching for the genotype mFields
private static final Pattern gtPattern = Pattern.compile("([0-9\\.]+)([\\\\|\\/])([0-9\\.]*)");
// our feature reader; so we can close it
private FeatureReader<VCFRecord> vcfReader = null;
/**
* Create a VCF reader, given a VCF file
@ -43,89 +43,36 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
* @param vcfFile the vcf file to write
*/
public VCFReader(File vcfFile) {
if (vcfFile.getName().endsWith(".gz"))
openGZipFile(vcfFile);
else
openTextVersion(vcfFile);
this.parseHeader();
initialize(vcfFile, null);
}
/**
* Create a VCF reader, given a stream.
* Create a VCF reader, given a VCF file
*
* @param stream the stream file read
* @param vcfFile the vcf file to write
*/
public VCFReader (InputStream stream)
{
mReader = new BufferedReader(
new InputStreamReader(
stream,
Charset.forName("UTF-8")));
this.parseHeader();
}
/**
* parse the header from the top of the file. We read lines that match
* the header pattern, and try to create a header. We then create the first
* record from the first non-header line in the file.
*/
private void parseHeader() {
String line = null;
// try and parse the header
try {
ArrayList<String> lines = new ArrayList<String>();
line = mReader.readLine();
while (line != null && line.startsWith("#")) {
lines.add(line);
line = mReader.readLine();
}
// try to make a header from the lines we parsed
mHeader = this.createHeader(lines);
// if the last line we parsed is null, we shouldn't try to make a record of it
if (line != null) mNextRecord = createRecord(line, mHeader);
} catch (IOException e) {
throw new RuntimeException("VCFReader: Failed to parse VCF File on line: " + line, e);
}
public VCFReader(File vcfFile, VCFCodec.LineTransform transform) {
initialize(vcfFile, transform);
}
/**
* open a g-zipped version of the VCF format
*
* @param vcfGZipFile the file to open
*/
private void openGZipFile(File vcfGZipFile) {
private void initialize(File vcfFile, VCFCodec.LineTransform transform) {
VCFCodec codec = new VCFCodec();
if (transform != null) codec.setTransformer(transform);
try {
mReader = new BufferedReader(
new InputStreamReader(new GZIPInputStream(
new FileInputStream(vcfGZipFile))));
vcfReader = new FeatureReader(vcfFile,codec);
iterator= vcfReader.iterator();
} catch (FileNotFoundException e) {
throw new RuntimeException("VCFReader: Unable to find VCF file: " + vcfGZipFile, e);
throw new StingException("Unable to read VCF File from " + vcfFile, e);
} catch (IOException e) {
throw new RuntimeException("VCFReader: A problem occured trying to open the file using the gzipped decompressor, filename: " + vcfGZipFile, e);
throw new StingException("Unable to read VCF File from " + vcfFile, e);
}
mHeader = codec.getHeader();
}
/**
* open the vcf file as a text file
*
* @param vcfFile the vcf file name
*/
private void openTextVersion(File vcfFile) {
try {
mReader = new BufferedReader(
new InputStreamReader(
new FileInputStream(vcfFile),
Charset.forName("UTF-8")));
} catch (FileNotFoundException e) {
throw new RuntimeException("VCFReader: Unable to find VCF text file: " + vcfFile, e);
}
}
/** @return true if we have another VCF record to return */
public boolean hasNext() {
return (mNextRecord != null);
return (iterator.hasNext());
}
/**
@ -134,15 +81,7 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
* @return a VCFRecord, representing the next record in the file
*/
public VCFRecord next() {
VCFRecord rec = mNextRecord;
try {
String line = mReader.readLine();
if (line == null) mNextRecord = null;
else mNextRecord = createRecord(line, mHeader);
} catch (IOException e) {
mNextRecord = null;
}
return rec;
return iterator.next();
}
/** Remove is not supported */
@ -150,196 +89,6 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
throw new UnsupportedOperationException("Unsupported operation");
}
/**
* create a VCF header, given an array of strings that all start with at least the # character. This function is
* package protected so that the VCFReader can access this function
*
* @param headerStrings a list of header strings
*
* @return a VCF Header created from the list of stinrgs
*/
protected VCFHeader createHeader(List<String> headerStrings) {
Set<VCFHeaderLine> metaData = new TreeSet<VCFHeaderLine>();
Set<String> auxTags = new LinkedHashSet<String>();
// iterate over all the passed in strings
for ( String str : headerStrings ) {
if ( !str.startsWith("##") ) {
String[] strings = str.substring(1).split("\t");
// the columns should be in order according to Richard Durbin
int arrayIndex = 0;
for (VCFHeader.HEADER_FIELDS field : VCFHeader.HEADER_FIELDS.values()) {
try {
if (field != VCFHeader.HEADER_FIELDS.valueOf(strings[arrayIndex]))
throw new RuntimeException("VCFReader: we were expecting column name " + field + " but we saw " + strings[arrayIndex]);
} catch (IllegalArgumentException e) {
throw new RuntimeException("VCFReader: Unknown column name \"" + strings[arrayIndex] + "\", it does not match a known column header name.");
}
arrayIndex++;
}
while (arrayIndex < strings.length) {
if (!strings[arrayIndex].equals("FORMAT"))
auxTags.add(strings[arrayIndex]);
arrayIndex++;
}
} else {
if ( str.startsWith("##INFO=") )
metaData.add(new VCFInfoHeaderLine(str.substring(7)));
else if ( str.startsWith("##FILTER=") )
metaData.add(new VCFFilterHeaderLine(str.substring(9)));
else if ( str.startsWith("##FORMAT=") )
metaData.add(new VCFFormatHeaderLine(str.substring(9)));
else {
int equals = str.indexOf("=");
if ( equals != -1 )
metaData.add(new VCFHeaderLine(str.substring(2, equals), str.substring(equals+1)));
}
}
}
return new VCFHeader(metaData, auxTags);
}
/**
* create the next VCFRecord, given the input line
*
* @param line the line from the file
* @param mHeader the VCF header
* @return the VCFRecord
*/
public static VCFRecord createRecord(String line, VCFHeader mHeader) {
try {
// things we need to make a VCF record
Map<VCFHeader.HEADER_FIELDS, String> values = new HashMap<VCFHeader.HEADER_FIELDS, String>();
String tokens[] = line.split("\t");
// check to ensure that the column count of tokens is right
if (tokens.length != mHeader.getColumnCount()) {
throw new RuntimeException("The input file line doesn't contain enough fields, it should have " + mHeader.getColumnCount() + " fields, it has " + tokens.length + ". Line = " + line);
}
int index = 0;
for (VCFHeader.HEADER_FIELDS field : mHeader.getHeaderFields())
values.put(field, tokens[index++]);
// if we have genotyping data, we try and extract the genotype fields
if (mHeader.hasGenotypingData()) {
String mFormatString = tokens[index];
String keyStrings[] = mFormatString.split(":");
List<VCFGenotypeRecord> genotypeRecords = new ArrayList<VCFGenotypeRecord>();
index++;
String[] alt_alleles = values.get(VCFHeader.HEADER_FIELDS.ALT).split(",");
for (String str : mHeader.getGenotypeSamples()) {
genotypeRecords.add(getVCFGenotype(str, keyStrings, tokens[index], alt_alleles, values.get(VCFHeader.HEADER_FIELDS.REF).charAt(0)));
index++;
}
VCFRecord vrec = new VCFRecord(values, mFormatString, genotypeRecords);
// associate the genotypes with this new record
for (VCFGenotypeRecord gr : genotypeRecords)
gr.setVCFRecord(vrec);
return vrec;
}
return new VCFRecord(values);
} catch (Exception e) {
throw new VCFParseException("VCF Reader failed to parsing, on line = " + line, e);
}
}
/**
* generate a VCF genotype record, given it's format string, the genotype string, and allele info
*
* @param sampleName the sample name
* @param formatString the format string for this record, which contains the keys for the genotype parameters
* @param genotypeString contains the phasing information, allele information, and values for genotype parameters
* @param altAlleles the alternate allele string array, which we index into based on the field parameters
* @param referenceBase the reference base
*
* @return a VCFGenotypeRecord
*/
public static VCFGenotypeRecord getVCFGenotype(String sampleName, String formatString, String genotypeString, String altAlleles[], char referenceBase) {
return getVCFGenotype(sampleName, formatString.split(":"), genotypeString, altAlleles, referenceBase);
}
/**
* generate a VCF genotype record, given it's format string, the genotype string, and allele info
*
* @param sampleName the sample name
* @param keyStrings the split format string for this record, which contains the keys for the genotype parameters
* @param genotypeString contains the phasing information, allele information, and values for genotype parameters
* @param altAlleles the alternate allele string array, which we index into based on the field parameters
* @param referenceBase the reference base
*
* @return a VCFGenotypeRecord
*/
public static VCFGenotypeRecord getVCFGenotype(String sampleName, String[] keyStrings, String genotypeString, String altAlleles[], char referenceBase) {
// parameters to create the VCF genotype record
HashMap<String, String> tagToValue = new HashMap<String, String>();
VCFGenotypeRecord.PHASE phase = VCFGenotypeRecord.PHASE.UNKNOWN;
List<VCFGenotypeEncoding> bases = new ArrayList<VCFGenotypeEncoding>();
for (String key : keyStrings) {
String parse;
int nextDivider;
if (!genotypeString.contains(":")) {
nextDivider = genotypeString.length();
parse = genotypeString;
} else {
nextDivider = (genotypeString.indexOf(":") > genotypeString.length()) ? genotypeString.length() : genotypeString.indexOf(":");
parse = genotypeString.substring(0, nextDivider);
}
if (key.equals(VCFGenotypeRecord.GENOTYPE_KEY)) {
Matcher m = gtPattern.matcher(parse);
if (!m.matches())
throw new RuntimeException("VCFReader: Unable to match GT genotype flag to it's expected pattern, the field was: " + parse);
phase = VCFGenotypeRecord.determinePhase(m.group(2));
addAllele(m.group(1), altAlleles, referenceBase, bases);
if (m.group(3).length() > 0) addAllele(m.group(3), altAlleles, referenceBase, bases);
} else {
if ( parse.length() == 0 )
parse = VCFGenotypeRecord.getMissingFieldValue(key);
tagToValue.put(key, parse);
}
if (nextDivider + 1 >= genotypeString.length()) nextDivider = genotypeString.length() - 1;
genotypeString = genotypeString.substring(nextDivider + 1, genotypeString.length());
}
if ( bases.size() > 0 && bases.get(0).equals(VCFGenotypeRecord.EMPTY_ALLELE) )
tagToValue.clear();
// catch some common errors, either there are too many field keys or there are two many field values
else if ( keyStrings.length != tagToValue.size() + ((bases.size() > 0) ? 1 : 0))
throw new RuntimeException("VCFReader: genotype value count doesn't match the key count (expected "
+ keyStrings.length + " but saw " + tagToValue.size() + ")");
else if ( genotypeString.length() > 0 )
throw new RuntimeException("VCFReader: genotype string contained additional unprocessed fields: " + genotypeString
+ ". This most likely means that the format string is shorter then the value fields.");
VCFGenotypeRecord rec = new VCFGenotypeRecord(sampleName, bases, phase);
for ( Map.Entry<String, String> entry : tagToValue.entrySet() )
rec.setField(entry.getKey(), entry.getValue());
return rec;
}
/**
* add an alternate allele to the list of alleles we have for a VCF genotype record
*
* @param alleleNumber the allele number, as a string
* @param altAlleles the list of alternate alleles
* @param referenceBase the reference base
* @param bases the list of bases for this genotype call
*/
private static void addAllele(String alleleNumber, String[] altAlleles, char referenceBase, List<VCFGenotypeEncoding> bases) {
if (alleleNumber.equals(VCFGenotypeRecord.EMPTY_ALLELE)) {
bases.add(new VCFGenotypeEncoding(VCFGenotypeRecord.EMPTY_ALLELE));
} else {
int alleleValue = Integer.valueOf(alleleNumber);
// check to make sure the allele value is within bounds
if (alleleValue < 0 || alleleValue > altAlleles.length)
throw new IllegalArgumentException("VCFReader: the allele value of " + alleleValue + " is out of bounds given the alternate allele list.");
if (alleleValue == 0)
bases.add(new VCFGenotypeEncoding(String.valueOf(referenceBase)));
else
bases.add(new VCFGenotypeEncoding(altAlleles[alleleValue - 1]));
}
}
/** @return get the header associated with this reader */
@ -352,11 +101,12 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
}
public void close() {
try {
mReader.close();
if (vcfReader != null) try {
vcfReader.close();
} catch (IOException e) {
// we don't really care
Utils.warnUser("Unable to close VCF reader file, this is not fatal, but is worth noting");
throw new StingException("Unable to close vcfReader",e);
}
iterator = null;
}
}

View File

@ -1,658 +0,0 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.genotype.*;
import java.util.*;
/**
* the basic VCF record type
*/
public class VCFRecord {
// standard info field keys
public static final String ANCESTRAL_ALLELE_KEY = "AA";
public static final String ALLELE_COUNT_KEY = "AC";
public static final String ALLELE_FREQUENCY_KEY = "AF";
public static final String ALLELE_NUMBER_KEY = "AN";
public static final String RMS_BASE_QUALITY_KEY = "BQ";
public static final String DBSNP_KEY = "DB";
public static final String DEPTH_KEY = "DP";
public static final String HAPMAP2_KEY = "H2";
public static final String HAPMAP3_KEY = "H3";
public static final String RMS_MAPPING_QUALITY_KEY = "MQ";
public static final String SAMPLE_NUMBER_KEY = "NS";
public static final String STRAND_BIAS_KEY = "SB";
// commonly used strings that are in the standard
public static final String FORMAT_FIELD_SEPERATOR = ":";
public static final String GENOTYPE_FIELD_SEPERATOR = ":";
public static final String FIELD_SEPERATOR = "\t";
public static final String FILTER_CODE_SEPERATOR = ";";
public static final String INFO_FIELD_SEPERATOR = ";";
// default values
public static final String UNFILTERED = ".";
public static final String PASSES_FILTERS = "0";
public static final String EMPTY_INFO_FIELD = ".";
public static final String EMPTY_ID_FIELD = ".";
public static final String EMPTY_ALLELE_FIELD = ".";
public static final String DOUBLE_PRECISION_FORMAT_STRING = "%.2f";
public static final int MISSING_GENOTYPE_QUALITY = -1;
// the reference base
private String mReferenceBases;
// our location
private GenomeLoc mLoc;
// our id
private String mID;
// the alternate bases
private final List<VCFGenotypeEncoding> mAlts = new ArrayList<VCFGenotypeEncoding>();
// our qual value
private double mQual;
// our filter string
private String mFilterString;
// our info fields -- use a TreeMap to ensure they can be pulled out in order (so it passes integration tests)
private final Map<String, String> mInfoFields = new TreeMap<String, String>();
private String mGenotypeFormatString;
// our genotype sample fields
private final List<VCFGenotypeRecord> mGenotypeRecords = new ArrayList<VCFGenotypeRecord>();
/**
* given a reference base, a location, and the format string, create a VCF record.
*
* @param referenceBases the reference bases to use
* @param location our genomic location
* @param genotypeFormatString the format string
*/
public VCFRecord(String referenceBases, GenomeLoc location, String genotypeFormatString) {
setReferenceBase(referenceBases);
setLocation(location);
mGenotypeFormatString = genotypeFormatString;
}
/**
* given the values for each of the columns, create a VCF record.
*
* @param columnValues a mapping of header strings to values
* @param genotypeFormatString the format string for the genotype records
* @param genotypeRecords the genotype records
*/
public VCFRecord(Map<VCFHeader.HEADER_FIELDS, String> columnValues, String genotypeFormatString, List<VCFGenotypeRecord> genotypeRecords) {
extractFields(columnValues);
mGenotypeRecords.addAll(genotypeRecords);
mGenotypeFormatString = genotypeFormatString;
}
/**
* given the values for each of the columns, create a VCF record.
*
* @param columnValues a mapping of header strings to values
*/
public VCFRecord(Map<VCFHeader.HEADER_FIELDS, String> columnValues) {
extractFields(columnValues);
mGenotypeFormatString = "";
}
/**
* create a VCF record
*
* @param referenceBases the reference bases to use
* @param contig the contig this variant is on
* @param position our position
* @param ID our ID string
* @param altBases the list of alternate bases
* @param qual the qual field
* @param filters the filters used on this variant
* @param infoFields the information fields
* @param genotypeFormatString the format string
* @param genotypeObjects the genotype objects
*/
public VCFRecord(String referenceBases,
String contig,
long position,
String ID,
List<VCFGenotypeEncoding> altBases,
double qual,
String filters,
Map<String, String> infoFields,
String genotypeFormatString,
List<VCFGenotypeRecord> genotypeObjects) {
setReferenceBase(referenceBases);
setLocation(contig, position);
this.mID = ID;
for (VCFGenotypeEncoding alt : altBases)
this.addAlternateBase(alt);
this.setQual(qual);
this.setFilterString(filters);
this.mInfoFields.putAll(infoFields);
this.mGenotypeFormatString = genotypeFormatString;
this.mGenotypeRecords.addAll(genotypeObjects);
}
/**
* extract the field values from the passed in array
*
* @param columnValues a map of the header fields to values
*/
private void extractFields(Map<VCFHeader.HEADER_FIELDS, String> columnValues) {
String chrom = null;
long position = -1;
for (VCFHeader.HEADER_FIELDS val : columnValues.keySet()) {
switch (val) {
case CHROM:
chrom = columnValues.get(val);
break;
case POS:
position = Integer.valueOf(columnValues.get(val));
break;
case ID:
setID(columnValues.get(val));
break;
case REF:
if (columnValues.get(val).length() != 1)
throw new IllegalArgumentException("Reference base should be a single character");
setReferenceBase(columnValues.get(val));
break;
case ALT:
String values[] = columnValues.get(val).split(",");
for (String alt : values)
addAlternateBase(new VCFGenotypeEncoding(alt));
break;
case QUAL:
setQual(Double.valueOf(columnValues.get(val)));
break;
case FILTER:
setFilterString(columnValues.get(val));
break;
case INFO:
String vals[] = columnValues.get(val).split(";");
for (String alt : vals) {
if ( alt.equals(EMPTY_INFO_FIELD) )
continue;
String keyVal[] = alt.split("=");
if ( keyVal.length == 1 )
addInfoField(keyVal[0], "");
else if (keyVal.length == 2)
addInfoField(keyVal[0], keyVal[1]);
else
throw new IllegalArgumentException("info field key-value pair did not parse into key->value pair: " + alt);
}
break;
}
}
setLocation(chrom, position);
}
/**
* do we have genotyping data
*
* @return true if we have genotyping data, false otherwise
*/
public boolean hasGenotypeData() {
return (mGenotypeRecords.size() > 0);
}
/**
* @return this VCF record's location
*/
public GenomeLoc getLocation() {
return mLoc;
}
/**
* @return the ID value for this record
*/
public String getID() {
return mID == null ? EMPTY_ID_FIELD : mID;
}
/**
* get the reference base
*
* @return either A, T, C, G, or N
*/
public String getReference() {
return mReferenceBases;
}
/**
* get the alternate allele strings
*
* @return an array of strings representing the alt alleles, or null if there are none
*/
public List<String> getAlternateAlleleList() {
ArrayList<String> alts = new ArrayList<String>();
for ( VCFGenotypeEncoding alt : mAlts )
alts.add(alt.getBases());
return alts;
}
public List<VCFGenotypeEncoding> getAlternateAlleles() {
return mAlts;
}
public boolean hasAlternateAllele() {
for ( VCFGenotypeEncoding alt : mAlts ) {
if ( alt.getType() != VCFGenotypeEncoding.TYPE.UNCALLED )
return true;
}
return false;
}
public boolean isBiallelic() {
return getAlternateAlleles().size() == 1;
}
public boolean isReference() {
return !hasAlternateAllele();
}
public List<String> getAlleleList() {
ArrayList<String> list = new ArrayList<String>();
list.add(getReference());
list.addAll(getAlternateAlleleList());
return list;
}
public double getNonRefAlleleFrequency() {
if ( mInfoFields.containsKey(ALLELE_FREQUENCY_KEY) ) {
return Double.valueOf(mInfoFields.get(ALLELE_FREQUENCY_KEY));
} else {
// this is the poor man's AF
if ( mInfoFields.containsKey(ALLELE_COUNT_KEY) && mInfoFields.containsKey(ALLELE_NUMBER_KEY)) {
String splt[] = mInfoFields.get(ALLELE_COUNT_KEY).split(",");
if ( splt.length > 0 ) {
return (Double.valueOf(splt[0]) / Double.valueOf(mInfoFields.get(ALLELE_NUMBER_KEY)));
}
}
}
return 0.0;
}
public VCFGenotypeEncoding.TYPE getType() {
VCFGenotypeEncoding.TYPE type = mAlts.get(0).getType();
for (int i = 1; i < mAlts.size(); i++) {
if ( mAlts.get(i).getType() != type )
return VCFGenotypeEncoding.TYPE.MIXED; // if we have more than one type, return mixed
}
return type;
}
public boolean isDeletion() {
return getType() == VCFGenotypeEncoding.TYPE.DELETION;
}
public boolean isInsertion() {
return getType() == VCFGenotypeEncoding.TYPE.INSERTION;
}
public boolean isIndel() {
return isDeletion() || isInsertion();
}
public boolean isSNP() {
return getType() == VCFGenotypeEncoding.TYPE.SINGLE_BASE;
}
public boolean isNovel() {
return ( ! isInDBSNP() ) && ( ! isInHapmap() );
}
public boolean isInDBSNP() {
return ( ( mID != null && ! mID.equals(".") ) || ( mInfoFields.get(DBSNP_KEY) != null && mInfoFields.get(DBSNP_KEY).equals("1") ) );
}
public boolean isInHapmap() {
if ( mInfoFields.get(HAPMAP2_KEY) != null && mInfoFields.get(HAPMAP2_KEY).equals("1") ) {
return true;
} else {
return ( mInfoFields.get(HAPMAP3_KEY) != null && mInfoFields.get(HAPMAP3_KEY).equals("1") );
}
}
public char getAlternativeBaseForSNP() {
if ( !isSNP() && !isBiallelic() )
throw new IllegalStateException("This record does not represent a SNP");
return mAlts.get(0).getBases().charAt(0);
}
public char getReferenceForSNP() {
if ( !isSNP() )
throw new IllegalStateException("This record does not represent a SNP");
return getReference().charAt(0);
}
/**
* @return the phred-scaled quality score
*/
public double getQual() {
return mQual;
}
public boolean isMissingQual() {
return (int)mQual == MISSING_GENOTYPE_QUALITY;
}
/**
* @return the -log10PError
*/
public double getNegLog10PError() {
return mQual / 10.0;
}
/**
* get the filter criteria
*
* @return an array of strings representing the filtering criteria, or UNFILTERED if none are applied
*/
public String[] getFilteringCodes() {
if (mFilterString == null) return new String[]{UNFILTERED};
return mFilterString.split(FILTER_CODE_SEPERATOR);
}
public boolean isFiltered() {
String[] codes = getFilteringCodes();
return !codes[0].equals(UNFILTERED) && !codes[0].equals(PASSES_FILTERS);
}
// public boolean hasFilteringCodes() {
// return mFilterString != null;
// }
public String getFilterString() {
return mFilterString;
}
/**
* get the information key-value pairs as a Map<>
*
* @return a map, of the info key-value pairs
*/
public final Map<String, String> getInfoValues() {
return mInfoFields;
}
public List<VCFGenotypeRecord> getVCFGenotypeRecords() {
return mGenotypeRecords;
}
/**
* @return a List of the sample names
*/
public String[] getSampleNames() {
String names[] = new String[mGenotypeRecords.size()];
for (int i = 0; i < mGenotypeRecords.size(); i++) {
names[i] = mGenotypeRecords.get(i).getSampleName();
}
return names;
}
public VCFGenotypeRecord getGenotype(final String sampleName) {
for ( VCFGenotypeRecord rec : getVCFGenotypeRecords() ) {
if ( rec.getSampleName().equals(sampleName) ) {
return rec;
}
}
return null;
}
public String getGenotypeFormatString() {
return mGenotypeFormatString;
}// the formatting string for our genotype records
public void setGenotypeFormatString(String newFormatString) {
mGenotypeFormatString = newFormatString;
}
public void setReferenceBase(String reference) {
mReferenceBases = reference.toUpperCase();
}
public void setLocation(String chrom, long position) {
if ( chrom == null )
throw new IllegalArgumentException("Chromosomes cannot be missing");
if ( position < 0 )
throw new IllegalArgumentException("Position values must be greater than 0");
mLoc = GenomeLocParser.createGenomeLoc(chrom, position);
}
public void setLocation(GenomeLoc location) {
mLoc = location.clone();
}
public void setID(String ID) {
mID = ID;
}
public void setQual(double qual) {
if ( qual < 0 && (int)qual != MISSING_GENOTYPE_QUALITY )
throw new IllegalArgumentException("Qual values cannot be negative unless they are " + MISSING_GENOTYPE_QUALITY + " ('unknown')");
mQual = qual;
}
public void setFilterString(String filterString) {
mFilterString = filterString;
}
public void addGenotypeRecord(VCFGenotypeRecord mGenotypeRecord) {
mGenotypeRecords.add(mGenotypeRecord);
}
public void setGenotypeRecords(List<VCFGenotypeRecord> records) {
mGenotypeRecords.clear();
for ( VCFGenotypeRecord g : records )
addGenotypeRecord(g);
}
/**
* add an alternate base to our alternate base list. All bases are uppercased
* before being added to the list.
*
* @param base the base to add
*/
public void addAlternateBase(VCFGenotypeEncoding base) {
if (!mAlts.contains(base)) mAlts.add(base);
}
public void setAlternateBases(List<VCFGenotypeEncoding> bases) {
mAlts.clear();
for ( VCFGenotypeEncoding e : bases )
addAlternateBase(e);
}
/**
* add an info field to the record
*
* @param key the key, from the spec or a user created key
* @param value it's value as a string
*/
public void addInfoField(String key, String value) {
//System.out.printf("Adding info field %s=%s%n", key, value);
mInfoFields.put(key, value);
}
public void printInfoFields() {
for ( Map.Entry<String, String> e : mInfoFields.entrySet() ) {
System.out.printf(" Current info field %s=%s this=%s%n", e.getKey(), e.getValue(), this);
}
}
/**
* add an info field to the record
*
* @param m A map from info keys to info values
*/
public void addInfoFields(Map<String,String> m) {
for ( Map.Entry<String, String> e : m.entrySet() )
addInfoField(e.getKey(), e.getValue());
}
/**
* the generation of a string representation, which is used by the VCF writer
*
* @param header the VCF header for this VCF Record
* @return a string
*/
public String toStringEncoding(VCFHeader header) {
return toStringEncoding(header, VCFGenotypeWriter.VALIDATION_STRINGENCY.STRICT);
}
/**
* the generation of a string representation, which is used by the VCF writer
*
* @param header the VCF header for this VCF Record
* @param validationStringency the validation stringency
* @return a string
*/
public String toStringEncoding(VCFHeader header, VCFGenotypeWriter.VALIDATION_STRINGENCY validationStringency) {
StringBuilder builder = new StringBuilder();
// CHROM \t POS \t ID \t REF \t ALT \t QUAL \t FILTER \t INFO
builder.append(mLoc.getContig());
builder.append(FIELD_SEPERATOR);
builder.append(mLoc.getStart());
builder.append(FIELD_SEPERATOR);
builder.append(getID());
builder.append(FIELD_SEPERATOR);
builder.append(getReference());
builder.append(FIELD_SEPERATOR);
List<VCFGenotypeEncoding> alts = getAlternateAlleles();
if ( alts.size() > 0 ) {
builder.append(alts.get(0));
for ( int i = 1; i < alts.size(); i++ ) {
builder.append(",");
builder.append(alts.get(i));
}
} else {
builder.append(EMPTY_ALLELE_FIELD);
}
builder.append(FIELD_SEPERATOR);
if ( (int)mQual == MISSING_GENOTYPE_QUALITY )
builder.append(MISSING_GENOTYPE_QUALITY);
else
builder.append(String.format(DOUBLE_PRECISION_FORMAT_STRING, mQual));
builder.append(FIELD_SEPERATOR);
builder.append(Utils.join(FILTER_CODE_SEPERATOR, getFilteringCodes()));
builder.append(FIELD_SEPERATOR);
builder.append(createInfoString());
if ( mGenotypeFormatString != null && mGenotypeFormatString.length() > 0 ) {
// try {
addGenotypeData(builder, header);
// } catch (Exception e) {
// if ( validationStringency == VCFGenotypeWriter.VALIDATION_STRINGENCY.STRICT )
// throw new RuntimeException(e);
// }
}
return builder.toString();
}
/**
* create the info string
*
* @return a string representing the infomation fields
*/
protected String createInfoString() {
StringBuffer info = new StringBuffer();
boolean isFirst = true;
for (String str : mInfoFields.keySet()) {
if ( isFirst )
isFirst = false;
else
info.append(INFO_FIELD_SEPERATOR);
info.append(str);
info.append("=");
info.append(mInfoFields.get(str));
}
return info.length() == 0 ? EMPTY_INFO_FIELD : info.toString();
}
/**
* add the genotype data
*
* @param builder the string builder
* @param header the header object
*/
private void addGenotypeData(StringBuilder builder, VCFHeader header) {
Map<String, VCFGenotypeRecord> gMap = genotypeListToMap(getVCFGenotypeRecords());
StringBuffer tempStr = new StringBuffer();
if ( header.getGenotypeSamples().size() < getVCFGenotypeRecords().size() ) {
for ( String sample : gMap.keySet() ) {
if ( !header.getGenotypeSamples().contains(sample) )
System.err.println("Sample " + sample + " is a duplicate or is otherwise not present in the header");
else
header.getGenotypeSamples().remove(sample);
}
throw new IllegalStateException("We have more genotype samples than the header specified; please check that samples aren't duplicated");
}
tempStr.append(FIELD_SEPERATOR + mGenotypeFormatString);
String[] genotypeFormatStrings = mGenotypeFormatString.split(":");
for ( String genotype : header.getGenotypeSamples() ) {
tempStr.append(FIELD_SEPERATOR);
if ( gMap.containsKey(genotype) ) {
VCFGenotypeRecord rec = gMap.get(genotype);
tempStr.append(rec.toStringEncoding(mAlts, genotypeFormatStrings));
gMap.remove(genotype);
} else {
tempStr.append(VCFGenotypeRecord.stringEncodingForEmptyGenotype(genotypeFormatStrings));
}
}
if ( gMap.size() != 0 ) {
for ( String sample : gMap.keySet() )
System.err.println("Sample " + sample + " is being genotyped but isn't in the header.");
throw new IllegalStateException("We failed to use all the genotype samples; there must be an inconsistancy between the header and records");
}
builder.append(tempStr);
}
/**
* compare two VCF records
*
* @param other the other VCF record
* @return true if they're equal
*/
public boolean equals(VCFRecord other) {
if (!this.mAlts.equals(other.mAlts)) return false;
if (!this.mReferenceBases.equals(other.mReferenceBases)) return false;
if (!this.mLoc.equals(other.mLoc)) return false;
if (!this.mID.equals(other.mID)) return false;
if (this.mQual != other.mQual) return false;
if ( this.mFilterString == null ) {
if ( other.mFilterString != null ) return false;
} else if ( !this.mFilterString.equals(other.mFilterString) ) return false;
if (!this.mInfoFields.equals(other.mInfoFields)) return false;
if (!this.mGenotypeRecords.equals(other.mGenotypeRecords)) return false;
return true;
}
/**
* create a genotype mapping from a list and their sample names
*
* @param list a list of genotype samples
* @return a mapping of the sample name to VCF genotype record
*/
private static Map<String, VCFGenotypeRecord> genotypeListToMap(List<VCFGenotypeRecord> list) {
Map<String, VCFGenotypeRecord> map = new HashMap<String, VCFGenotypeRecord>();
for (int i = 0; i < list.size(); i++) {
VCFGenotypeRecord rec = list.get(i);
map.put(rec.getSampleName(), rec);
}
return map;
}
}

View File

@ -25,10 +25,11 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RodVCF;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.Utils;
@ -48,7 +49,7 @@ public class VCFUtils {
for ( ReferenceOrderedDataSource source : toolkit.getRodDataSources() ) {
RMDTrack rod = source.getReferenceOrderedData();
if ( rod.getType().equals(RodVCF.class) ) {
if ( rod.getType().equals(VCFRecord.class) ) {
vcfs.add(rod);
}
}
@ -72,7 +73,7 @@ public class VCFUtils {
List<ReferenceOrderedDataSource> dataSources = toolkit.getRodDataSources();
for ( ReferenceOrderedDataSource source : dataSources ) {
RMDTrack rod = source.getReferenceOrderedData();
if ( rod.getType().equals(RodVCF.class) ) {
if ( rod.getType().equals(VCFCodec.class) ) {
VCFReader reader = new VCFReader(rod.getFile());
fields.addAll(reader.getHeader().getMetaData());
reader.close();
@ -89,7 +90,7 @@ public class VCFUtils {
* @param rodNamesToSampleNames mapping of rod/sample pairs to new uniquified sample names
* @return the new merged vcf record
*/
public static VCFRecord mergeRecords(List<RodVCF> rods, Map<Pair<String, String>, String> rodNamesToSampleNames) {
public static VCFRecord mergeRecords(Map<VCFRecord,String> rods, Map<Pair<String, String>, String> rodNamesToSampleNames) {
VCFParameters params = new VCFParameters();
params.addFormatItem(VCFGenotypeRecord.GENOTYPE_KEY);
@ -100,14 +101,14 @@ public class VCFUtils {
Map<String, String> infoFields = new HashMap<String, String>();
List<String> filters = new ArrayList<String>();
for ( RodVCF rod : rods ) {
for ( VCFRecord rod : rods.keySet() ) {
List<VCFGenotypeRecord> myGenotypes = rod.getVCFGenotypeRecords();
for ( VCFGenotypeRecord call : myGenotypes ) {
// set the name to be the new uniquified name and add it to the list of genotypes
call.setSampleName(rodNamesToSampleNames.get(new Pair<String, String>(rod.getName(), call.getSampleName())));
call.setSampleName(rodNamesToSampleNames.get(new Pair<String, String>(rods.get(rod), call.getSampleName())));
if ( params.getPosition() < 1 )
params.setLocations(rod.getLocation(), call.getReference());
params.addGenotypeRecord(createVCFGenotypeRecord(params, call, rod.mCurrentRecord));
params.setLocations(GenomeLocParser.createGenomeLoc(rod.getChr(), rod.getStart()), call.getReference());
params.addGenotypeRecord(createVCFGenotypeRecord(params, call, rod));
}
// set the overall confidence to be the max entry we see

View File

@ -1,6 +1,10 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broad.tribble.vcf.VCFRecord;
import java.io.*;
import java.util.TreeSet;
@ -97,7 +101,7 @@ public class VCFWriter {
if ( mHeader == null )
throw new IllegalStateException("The VCF Header must be written before records can be added");
String vcfString = record.toStringEncoding(mHeader, validationStringency);
String vcfString = record.toStringEncoding(mHeader);
try {
mWriter.write(vcfString + "\n");
mWriter.flush(); // necessary so that writing to an output stream will work

View File

@ -1,181 +0,0 @@
package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeader;
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
import org.junit.Assert;
import static org.junit.Assert.fail;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
/**
* @author aaron
* <p/>
* Class RodVCFUnitTest
* <p/>
* test out the rod VCF
*/
public class RodVCFUnitTest extends BaseTest {
private static File vcfFile = new File(validationDataLocation + "vcfexample.vcf");
private VCFHeader mHeader;
@BeforeClass
public static void beforeTests() {
IndexedFastaSequenceFile seq;
try {
seq = new IndexedFastaSequenceFile(new File(oneKGLocation + "reference/human_b36_both.fasta"));
} catch (FileNotFoundException e) {
throw new StingException("unable to load the sequence dictionary");
}
GenomeLocParser.setupRefContigOrdering(seq);
}
private RodVCF getVCFObject() {
RodVCF vcf = new RodVCF("VCF");
mHeader = null;
try {
mHeader = (VCFHeader) vcf.initialize(vcfFile);
} catch (FileNotFoundException e) {
fail("Unable to open VCF file");
}
mHeader.checkVCFVersion();
return vcf;
}
@Test
public void testInitialize() {
getVCFObject();
}
@Test
public void testToIterator() {
RodVCF vcf = getVCFObject();
VCFReader reader = new VCFReader(vcfFile);
Iterator<RodVCF> iter = vcf.createIterator("VCF", vcfFile);
while (iter.hasNext()) {
VCFRecord rec1 = reader.next();
VCFRecord rec2 = iter.next().mCurrentRecord;
if (!rec1.equals(rec2)) {
fail("VCF record rec1 != rec2");
}
}
}
@Test
public void testToMAF() {
RodVCF vcf = getVCFObject();
Iterator<RodVCF> iter = vcf.createIterator("VCF", vcfFile);
RodVCF rec = iter.next();
Assert.assertEquals(rec.getNonRefAlleleFrequency(), 0.786, 0.00001);
rec = iter.next();
rec = iter.next();
rec = iter.next();
Assert.assertEquals(rec.getNonRefAlleleFrequency(), 0.0, 0.00001);
}
@Test
public void testToString() {
// slightly altered line, due to map ordering
final String firstLine = "20\t14370\trs6054257\tG\tA\t29.00\t0\tAF=0.786;DP=258;NS=58\tGT:GQ:DP:HQ\t0|0:48:1:51,51\t1|0:48:8:51,51\t1/1:43:5:\n";
RodVCF vcf = getVCFObject();
VCFReader reader = new VCFReader(vcfFile);
Iterator<RodVCF> iter = vcf.createIterator("VCF", vcfFile);
boolean first = true;
while (iter.hasNext()) {
VCFRecord rec1 = reader.next();
VCFRecord rec2 = iter.next().mCurrentRecord;
if (!rec1.toStringEncoding(mHeader).equals(rec2.toStringEncoding(mHeader))) {
fail("VCF record rec1.toStringEncoding() != rec2.toStringEncoding()");
}
// verify the first line too
if (first) {
if (!firstLine.equals(rec1.toStringEncoding(mHeader) + "\n")) {
fail("VCF record rec1.toStringEncoding() != expected string :\n" + rec1.toStringEncoding(mHeader) + "\n" + firstLine);
}
first = false;
}
}
}
@Test
public void testInsertion() {
RodVCF vcf = getVCFObject();
Iterator<RodVCF> iter = vcf.createIterator("VCF", vcfFile);
RodVCF rec = iter.next();
Assert.assertTrue(rec.isSNP());
rec = iter.next();
rec = iter.next();
rec = iter.next();
rec = iter.next();
Assert.assertTrue(rec.isIndel());
Assert.assertTrue(rec.isDeletion());
}
@Test
public void testDeletion() {
RodVCF vcf = getVCFObject();
Iterator<RodVCF> iter = vcf.createIterator("VCF", vcfFile);
RodVCF rec = iter.next();
Assert.assertTrue(rec.isSNP());
rec = iter.next();
rec = iter.next();
rec = iter.next();
rec = iter.next();
rec = iter.next();
Assert.assertTrue(rec.isIndel());
Assert.assertTrue(rec.isInsertion());
}
@Test
public void testGetGenotypes() {
RodVCF vcf = getVCFObject();
Iterator<RodVCF> iter = vcf.createIterator("VCF", vcfFile);
RodVCF rec = iter.next();
// we should get back a ref 'G' and an alt 'A'
List<VCFGenotypeRecord> list = rec.getVCFGenotypeRecords();
List<String> knowngenotypes = new ArrayList<String>();
knowngenotypes.add("GG");
knowngenotypes.add("AG");
knowngenotypes.add("AA");
Assert.assertEquals(3, list.size());
for (VCFGenotypeRecord g: list) {
Assert.assertTrue(knowngenotypes.contains(g.getBases()));
}
}
@Test
public void testGetGenotypesQual() {
RodVCF vcf = getVCFObject();
Iterator<RodVCF> iter = vcf.createIterator("VCF", vcfFile);
RodVCF rec = iter.next();
// we should get back a ref 'G' and an alt 'A'
List<VCFGenotypeRecord> list = rec.getVCFGenotypeRecords();
Assert.assertEquals(4.8,list.get(0).getNegLog10PError(),0.0001);
Assert.assertEquals(4.8,list.get(1).getNegLog10PError(),0.0001);
Assert.assertEquals(4.3,list.get(2).getNegLog10PError(),0.0001);
}
@Test
public void testGetLocation() {
RodVCF vcf = getVCFObject();
Iterator<RodVCF> iter = vcf.createIterator("VCF", vcfFile);
RodVCF rec = iter.next();
GenomeLoc loc = rec.getLocation();
Assert.assertEquals(14370,loc.getStart());
Assert.assertEquals(14370,loc.getStop());
Assert.assertTrue(loc.getContig().equals("20"));
}
}

View File

@ -27,7 +27,7 @@ public class RodSystemValidationIntegrationTest extends WalkerTest {
public void testSimpleVCFPileup() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString1KG() + " -B eval,VCF," + validationDataLocation + "MultiSample.vcf", 1,
Arrays.asList("ff4da30a3b6428bc64d61214c715efa5"));
Arrays.asList("5d84c75746738833b6c9441d9d614553"));
executeTest("testSimpleVCFPileup", spec);
}
@ -37,7 +37,7 @@ public class RodSystemValidationIntegrationTest extends WalkerTest {
baseTestString1KG() + " -B eval,VCF," + validationDataLocation + "MultiSample.vcf" +
" -B eval,VCF," + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf"
, 1,
Arrays.asList("52f758a70ae603213d4399220151357d"));
Arrays.asList("6dd0ed0a6fe7096ccb66beffb8d455da"));
executeTest("testComplexVCFPileup", spec);
}
@ -49,7 +49,7 @@ public class RodSystemValidationIntegrationTest extends WalkerTest {
" -B eval,VCF," + validationDataLocation + "CEU_hapmap_nogt_23.vcf" +
" -L 1 -L 2 -L 20"
, 1,
Arrays.asList("421a27be91496e88f2f4e197dcfad0ff"));
Arrays.asList("ab3da32eae65e8c15a9f4a787a190a37"));
executeTest("testLargeComplexVCFPileup", spec);
}
}

View File

@ -1,151 +0,0 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import org.broadinstitute.sting.BaseTest;
import org.junit.Assert;
import org.junit.Test;
/**
* @author aaron
* <p/>
* Class VCFGenotypeEncodingUnitTest
* <p/>
* test the VCFGenotypeEncoding class
*/
public class VCFGenotypeEncodingUnitTest extends BaseTest {
@Test
public void testDecodingSingle() {
VCFGenotypeEncoding enc = new VCFGenotypeEncoding("A");
Assert.assertTrue("A".equals(enc.toString()));
Assert.assertEquals(0, enc.getLength());
Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType());
VCFGenotypeEncoding enc2 = new VCFGenotypeEncoding("C");
Assert.assertTrue("C".equals(enc2.toString()));
Assert.assertEquals(0, enc.getLength());
Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType());
VCFGenotypeEncoding enc3 = new VCFGenotypeEncoding("G");
Assert.assertTrue("G".equals(enc3.toString()));
Assert.assertEquals(0, enc.getLength());
Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType());
VCFGenotypeEncoding enc4 = new VCFGenotypeEncoding("T");
Assert.assertTrue("T".equals(enc4.toString()));
Assert.assertEquals(0, enc.getLength());
Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType());
VCFGenotypeEncoding enc5 = new VCFGenotypeEncoding("a");
Assert.assertTrue("A".equals(enc5.toString()));
Assert.assertEquals(0, enc.getLength());
Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType());
VCFGenotypeEncoding enc6 = new VCFGenotypeEncoding("c");
Assert.assertTrue("C".equals(enc6.toString()));
Assert.assertEquals(0, enc.getLength());
Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType());
VCFGenotypeEncoding enc7 = new VCFGenotypeEncoding("g");
Assert.assertTrue("G".equals(enc7.toString()));
Assert.assertEquals(0, enc.getLength());
Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType());
VCFGenotypeEncoding enc8 = new VCFGenotypeEncoding("t");
Assert.assertTrue("T".equals(enc8.toString()));
Assert.assertEquals(0, enc.getLength());
Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType());
}
@Test(expected = IllegalArgumentException.class)
public void testDecodingSingleBadBase() {
VCFGenotypeEncoding enc = new VCFGenotypeEncoding("E");
}
@Test(expected = IllegalArgumentException.class)
public void testDecodingSingleWrongBase() {
VCFGenotypeEncoding enc = new VCFGenotypeEncoding("I");
}
@Test
public void testValidIndel() {
VCFGenotypeEncoding enc = new VCFGenotypeEncoding("IAGGC");
Assert.assertEquals(4, enc.getLength());
Assert.assertTrue(enc.getBases().equals("AGGC"));
Assert.assertEquals(VCFGenotypeEncoding.TYPE.INSERTION, enc.getType());
}
@Test(expected = IllegalArgumentException.class)
public void testBadIndel() {
VCFGenotypeEncoding enc = new VCFGenotypeEncoding("IAGRC");
}
@Test
public void testValidDel() {
VCFGenotypeEncoding enc = new VCFGenotypeEncoding("D40");
Assert.assertEquals(40, enc.getLength());
Assert.assertTrue(enc.getBases().equals(""));
Assert.assertEquals(VCFGenotypeEncoding.TYPE.DELETION, enc.getType());
}
@Test(expected = IllegalArgumentException.class)
public void testBadDel() {
VCFGenotypeEncoding enc = new VCFGenotypeEncoding("DAGCT");
}
@Test
public void testValidNoCall() {
VCFGenotypeEncoding enc = new VCFGenotypeEncoding(".");
Assert.assertEquals(0, enc.getLength());
Assert.assertTrue(enc.getBases().equals("."));
Assert.assertEquals(VCFGenotypeEncoding.TYPE.UNCALLED, enc.getType());
}
@Test(expected = IllegalArgumentException.class)
public void testBadNoCall() {
VCFGenotypeEncoding enc = new VCFGenotypeEncoding("..");
}
@Test
public void testEquals() {
VCFGenotypeEncoding enc = new VCFGenotypeEncoding("A");
VCFGenotypeEncoding enc2 = new VCFGenotypeEncoding("A");
VCFGenotypeEncoding enc3 = new VCFGenotypeEncoding("C");
Assert.assertTrue(enc.equals(enc2));
Assert.assertTrue(!enc.equals(enc3));
enc = new VCFGenotypeEncoding("D40");
enc2 = new VCFGenotypeEncoding("D40");
enc3 = new VCFGenotypeEncoding("D41");
Assert.assertTrue(enc.equals(enc2));
Assert.assertTrue(!enc.equals(enc3));
enc = new VCFGenotypeEncoding("IAAC");
enc2 = new VCFGenotypeEncoding("IAAC");
enc3 = new VCFGenotypeEncoding("IACG");
Assert.assertTrue(enc.equals(enc2));
Assert.assertTrue(!enc.equals(enc3));
enc = new VCFGenotypeEncoding(".");
enc2 = new VCFGenotypeEncoding(".");
Assert.assertTrue(enc.equals(enc2));
}
@Test
public void testHashCode() {
VCFGenotypeEncoding enc = new VCFGenotypeEncoding("A");
VCFGenotypeEncoding enc2 = new VCFGenotypeEncoding("A");
VCFGenotypeEncoding enc3 = new VCFGenotypeEncoding("C");
Assert.assertTrue(enc.hashCode() == enc2.hashCode());
Assert.assertTrue(enc.hashCode() != enc3.hashCode());
enc = new VCFGenotypeEncoding("D40");
enc2 = new VCFGenotypeEncoding("D40");
enc3 = new VCFGenotypeEncoding("D41");
Assert.assertTrue(enc.hashCode() == enc2.hashCode());
Assert.assertTrue(enc.hashCode() != enc3.hashCode());
enc = new VCFGenotypeEncoding("IAAC");
enc2 = new VCFGenotypeEncoding("IAAC");
enc3 = new VCFGenotypeEncoding("IACG");
Assert.assertTrue(enc.hashCode() == enc2.hashCode());
Assert.assertTrue(enc.hashCode() != enc3.hashCode());
enc = new VCFGenotypeEncoding(".");
enc2 = new VCFGenotypeEncoding(".");
Assert.assertTrue(enc.hashCode() == enc2.hashCode());
}
}

View File

@ -1,52 +0,0 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import org.broadinstitute.sting.BaseTest;
import org.junit.Assert;
import org.junit.Test;
import java.util.*;
/**
*
* @author aaron
*
* Class VCFHeaderUnitTest
*
* Test the VCF Header class
*/
public class VCFHeaderUnitTest extends BaseTest {
private Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
private Set<String> additionalColumns = new HashSet<String>();
/**
* give it fake data, and make sure we get back the right fake data
*/
@Test
public void testHeaderConstructor() {
metaData.add(new VCFHeaderLine(VCFHeader.FILE_FORMAT_KEY, VCFHeader.VCF_VERSION));
metaData.add(new VCFHeaderLine("two", "2"));
additionalColumns.add("extra1");
additionalColumns.add("extra2");
// this should create a header that is valid
VCFHeader header = new VCFHeader(metaData, additionalColumns);
// check the fields
int index = 0;
for (VCFHeader.HEADER_FIELDS field : header.getHeaderFields()) {
Assert.assertEquals(VCFHeader.HEADER_FIELDS.values()[index],field);
index++;
}
Assert.assertEquals(metaData.size(), header.getMetaData().size());
index = 0;
for (String key: header.getGenotypeSamples()) {
Assert.assertTrue(additionalColumns.contains(key));
index++;
}
Assert.assertEquals(additionalColumns.size(),index);
}
}

View File

@ -1,18 +0,0 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import org.broadinstitute.sting.WalkerTest;
import org.junit.Test;
import java.util.Arrays;
public class VCFIntegrationTest extends WalkerTest {
@Test
public void test1() {
// Read in and then emit each record
WalkerTestSpec spec = new WalkerTestSpec(
"-T PrintRODs -R " + oneKGLocation + "reference/human_b36_both.fasta -L 1:10,000,000-10,050,000 -o %s -B vcf,VCF," + validationDataLocation + "complexExample.vcf", 1,
Arrays.asList("68b123acca4975553297fcd776c70464"));
executeTest("test vcf", spec);
}
}

View File

@ -1,366 +0,0 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.refdata.RodVCF;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
/** test the VCFReader class test */
public class VCFReaderUnitTest extends BaseTest {
private static final File vcfFile = new File(validationDataLocation + "vcfexample.vcf");
private static final File multiSampleVCF = new File(validationDataLocation + "MultiSample.vcf");
private static final String VCF_MIXUP_FILE = validationDataLocation + "mixedup.v2.vcf";
private static final File complexFile = new File(validationDataLocation + "complexExample.vcf");
private static final File headerNoRecordsFile = new File(validationDataLocation + "headerNoRecords.vcf");
private static final File headerSampleSpaceFile = new File(validationDataLocation + "headerSampleSpaceFile.vcf");
@BeforeClass
public static void beforeTests() {
try {
IndexedFastaSequenceFile seq = new IndexedFastaSequenceFile(new File(oneKGLocation + "reference/human_b36_both.fasta"));
GenomeLocParser.setupRefContigOrdering(seq);
} catch (FileNotFoundException e) {
throw new StingException("unable to load the sequence dictionary");
}
}
@Test
public void testVCFInput() {
VCFReader reader = new VCFReader(vcfFile);
int counter = 0;
while (reader.hasNext()) {
counter++;
reader.next();
}
Assert.assertEquals(6, counter);
}
@Test
public void testMultiSampleVCFInput() {
VCFReader reader = new VCFReader(multiSampleVCF);
int counter = 0;
while (reader.hasNext()) {
counter++;
reader.next();
}
Assert.assertEquals(99, counter);
}
@Test
public void testNoCallSites() {
VCFReader reader = new VCFReader(multiSampleVCF);
if (!reader.hasNext()) Assert.fail("The reader should have a record");
VCFRecord rec = reader.next();
final int numberOfNoCallsTruth = 9;
int noCalls = 0;
for (VCFGenotypeRecord record : rec.getVCFGenotypeRecords()) {
List<VCFGenotypeEncoding> encodings = record.getAlleles();
if (encodings.get(0).getType() == VCFGenotypeEncoding.TYPE.UNCALLED &&
encodings.get(1).getType() == VCFGenotypeEncoding.TYPE.UNCALLED)
noCalls++;
}
Assert.assertEquals(numberOfNoCallsTruth, noCalls);
}
@Test
public void testKnownCallSites() {
VCFReader reader = new VCFReader(multiSampleVCF);
if (!reader.hasNext()) Assert.fail("The reader should have a record");
VCFRecord rec = reader.next();
boolean seenNA11992 = false;
boolean seenNA12287 = false;
for (VCFGenotypeRecord record : rec.getVCFGenotypeRecords()) {
if (record.getSampleName().equals("NA11992")) {
List<VCFGenotypeEncoding> encodings = record.getAlleles();
if (!encodings.get(0).getBases().equals("A") ||
!encodings.get(1).getBases().equals("A")) {
Assert.fail("Sample NA11992 at site 1:10000005 should be AA");
}
seenNA11992 = true;
}
if (record.getSampleName().equals("NA12287")) {
List<VCFGenotypeEncoding> encodings = record.getAlleles();
if (!encodings.get(0).getBases().equals("A") ||
!encodings.get(1).getBases().equals("T")) {
Assert.fail("Sample NA11992 at site 1:10000005 should be AA");
}
seenNA12287 = true;
}
}
Assert.assertTrue(seenNA11992 && seenNA12287);
}
/** test the basic parsing */
@Test
public void testBasicParsing() {
String formatString = "GT:B:C:D";
String genotypeString = "0|1:2:3:4";
String altAlleles[] = {"A", "G", "T"};
char referenceBase = 'C';
VCFGenotypeRecord rec = VCFReader.getVCFGenotype("test", formatString, genotypeString, altAlleles, referenceBase);
Assert.assertEquals(VCFGenotypeRecord.PHASE.PHASED, rec.getPhaseType());
Assert.assertEquals("C", rec.getAlleles().get(0).toString());
Assert.assertEquals("A", rec.getAlleles().get(1).toString());
Map<String, String> values = rec.getFields();
Assert.assertEquals(3, values.size());
Assert.assertTrue(values.get("B").equals("2"));
Assert.assertTrue(values.get("C").equals("3"));
Assert.assertTrue(values.get("D").equals("4"));
}
/** test the parsing of a genotype field with missing parameters */
@Test
public void testMissingFieldParsing() {
String formatString = "GT:B:C:D";
String genotypeString = "0|1:::4";
String altAlleles[] = {"A", "G", "T"};
char referenceBase = 'C';
VCFGenotypeRecord rec = VCFReader.getVCFGenotype("test", formatString, genotypeString, altAlleles, referenceBase);
Assert.assertEquals(VCFGenotypeRecord.PHASE.PHASED, rec.getPhaseType());
Assert.assertEquals("C", rec.getAlleles().get(0).toString());
Assert.assertEquals("A", rec.getAlleles().get(1).toString());
Map<String, String> values = rec.getFields();
Assert.assertEquals(3, values.size());
Assert.assertTrue(values.get("B").equals(""));
Assert.assertTrue(values.get("C").equals(""));
Assert.assertTrue(values.get("D").equals("4"));
}
/** test the parsing of a genotype field with different missing parameters */
@Test
public void testMissingAllFields() {
String formatString = "GT:B:C:D";
String genotypeString = "0|1:::";
String altAlleles[] = {"A", "G", "T"};
char referenceBase = 'C';
VCFGenotypeRecord rec = VCFReader.getVCFGenotype("test", formatString, genotypeString, altAlleles, referenceBase);
Assert.assertEquals(VCFGenotypeRecord.PHASE.PHASED, rec.getPhaseType());
Assert.assertEquals("C", rec.getAlleles().get(0).toString());
Assert.assertEquals("A", rec.getAlleles().get(1).toString());
Map<String, String> values = rec.getFields();
Assert.assertEquals(3, values.size());
Assert.assertTrue(values.get("B").equals(""));
Assert.assertTrue(values.get("C").equals(""));
Assert.assertTrue(values.get("D").equals(""));
}
@Test
public void testKiransOutOfOrderExample() {
ArrayList<String> header = new ArrayList<String>();
ArrayList<String> variant = new ArrayList<String>();
try {
FileReader reader = new FileReader(VCF_MIXUP_FILE);
BufferedReader breader = new BufferedReader(reader);
String line;
while ((line = breader.readLine()) != null) {
String[] pieces = line.split("\t");
if (line.contains("##")) {
continue;
} else if (line.contains("#CHROM")) {
for (int i = 0; i < pieces.length; i++) {
header.add(i, pieces[i]);
}
} else {
for (int i = 0; i < pieces.length; i++) {
variant.add(i, pieces[i]);
}
}
}
} catch (FileNotFoundException e) {
Assert.fail("Unable to open the mixed up VCF file.");
} catch (IOException e) {
Assert.fail("Unable to read from the mixed up VCF file.");
}
// now load up a ROD
Iterator<RodVCF> rod = RodVCF.createIterator("name",new File(VCF_MIXUP_FILE));
RodVCF newRod = rod.next();
List<VCFGenotypeRecord> records = newRod.mCurrentRecord.getVCFGenotypeRecords();
for (VCFGenotypeRecord record: records) {
if (!header.contains(record.getSampleName())) {
Assert.fail("Parsed header doesn't contain field " + record.getSampleName());
}
if (!variant.get(header.indexOf(record.getSampleName())).equals(record.toStringEncoding(newRod.mCurrentRecord.getAlternateAlleles(), newRod.mCurrentRecord.getGenotypeFormatString().split(":")))) {
Assert.fail("Parsed value for " + record.getSampleName() + " doesn't contain the same value ( " + record.toStringEncoding(newRod.mCurrentRecord.getAlternateAlleles(), newRod.mCurrentRecord.getGenotypeFormatString().split(":"))
+ " != " + variant.get(header.indexOf(record.getSampleName())));
}
}
}
@Test
public void testComplexExample() {
VCFReader reader = new VCFReader(complexFile);
// test header
Set<VCFHeaderLine> headerLines = reader.getHeader().getMetaData();
int formatCount = 0;
int infoCount = 0;
int filterCount = 0;
for ( VCFHeaderLine line : headerLines ) {
if ( line instanceof VCFFormatHeaderLine )
formatCount++;
else if ( line instanceof VCFInfoHeaderLine )
infoCount++;
else if ( line instanceof VCFFilterHeaderLine )
filterCount++;
}
Assert.assertEquals(3, formatCount);
Assert.assertEquals(4, infoCount);
Assert.assertEquals(0, filterCount);
// record #1: test dbsnp id
if (!reader.hasNext()) Assert.fail("The reader should have a record");
VCFRecord rec = reader.next();
Assert.assertTrue(rec.getID().equals("testid1"));
List<VCFGenotypeRecord> grecords = rec.getVCFGenotypeRecords();
int counter = 0;
for ( VCFGenotypeRecord grec : grecords ) {
if ( grec.isEmptyGenotype() )
counter++;
}
Assert.assertEquals(2, counter);
// record #2: test info field
if (!reader.hasNext()) Assert.fail("The reader should have a record");
rec = reader.next();
Assert.assertTrue(rec.getInfoValues().get("RMSMAPQ").equals("81.91"));
// record #3: test alternate alleles
if (!reader.hasNext()) Assert.fail("The reader should have a record");
rec = reader.next();
List<String> alts = rec.getAlternateAlleleList();
Assert.assertTrue((alts.get(0).equals("T") && alts.get(1).equals("C")) || (alts.get(0).equals("C") && alts.get(1).equals("T")));
// record #4: test max GQ value and read count
if (!reader.hasNext()) Assert.fail("The reader should have a record");
rec = reader.next();
grecords = rec.getVCFGenotypeRecords();
for ( VCFGenotypeRecord grec : grecords ) {
if ( !grec.isEmptyGenotype() ) {
Assert.assertTrue(grec.getFields().get("GQ").equals("2000000"));
Assert.assertEquals(7, grec.getReadCount());
}
}
// record #5: test more alternate alleles
if (!reader.hasNext()) Assert.fail("The reader should have a record");
rec = reader.next();
alts = rec.getAlternateAlleleList();
Assert.assertEquals(alts.size(), 3);
grecords = rec.getVCFGenotypeRecords();
boolean[] seen = new boolean[3];
for ( VCFGenotypeRecord grec : grecords ) {
if ( grec.getBases().equals("CC") ) {
Assert.assertEquals(27, grec.getReadCount());
seen[0] = true;
} else if ( grec.getBases().equals("AA") ) {
Assert.assertEquals(26, grec.getReadCount());
seen[1] = true;
} else if ( grec.getBases().equals("GT") || grec.getBases().equals("TG") ) {
Assert.assertEquals(16, grec.getReadCount());
seen[2] = true;
}
}
Assert.assertTrue(seen[0] && seen[1] && seen[2]);
// record #6: toVariation
if (!reader.hasNext()) Assert.fail("The reader should have a record");
rec = reader.next();
grecords = rec.getVCFGenotypeRecords();
for ( VCFGenotypeRecord grec : grecords ) {
if ( !grec.isEmptyGenotype() ) {
Assert.assertTrue(grec.isVariant(rec.getReference().charAt(0)));
Assert.assertEquals(rec, grec.getRecord());
}
}
// record #7: default values, isVariation, and isHom/isHet
if (!reader.hasNext()) Assert.fail("The reader should have a record");
rec = reader.next();
grecords = rec.getVCFGenotypeRecords();
for ( VCFGenotypeRecord grec : grecords ) {
if ( !grec.isVariant(rec.getReference().charAt(0)) ) {
Assert.assertTrue(grec.isHom());
Assert.assertTrue(grec.getFields().get("GQ").equals("-1"));
Assert.assertEquals(-1, grec.getReadCount());
} else {
Assert.assertTrue(grec.isHet());
if ( grec.getReadCount() == 4 )
Assert.assertTrue(grec.getFields().get("GQ").equals("-1"));
else
Assert.assertTrue(grec.getFields().get("GQ").equals("5.85") && grec.getReadCount() == -1);
}
}
// record #8: filtered and type
if (!reader.hasNext()) Assert.fail("The reader should have a record");
rec = reader.next();
Assert.assertTrue(!rec.isFiltered());
Assert.assertTrue(rec.getFilterString().equals("."));
Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, rec.getType());
// record #9: deletion
if (!reader.hasNext()) Assert.fail("The reader should have a record");
rec = reader.next();
Assert.assertEquals(VCFGenotypeEncoding.TYPE.DELETION, rec.getType());
Assert.assertEquals(1, rec.getAlternateAlleleList().size());
Assert.assertTrue(rec.getAlternateAlleleList().get(0).equals(""));
// record #10: insertion
if (!reader.hasNext()) Assert.fail("The reader should have a record");
rec = reader.next();
Assert.assertEquals(VCFGenotypeEncoding.TYPE.INSERTION, rec.getType());
Assert.assertEquals(rec.getAlternateAlleleList().size(), 1);
Assert.assertTrue(rec.getAlternateAlleleList().get(0).equals("CAT"));
// record #11: more filters
if (!reader.hasNext()) Assert.fail("The reader should have a record");
rec = reader.next();
Assert.assertTrue(rec.isFiltered());
Assert.assertTrue(rec.getFilterString().equals("foo"));
// record #12: yet more filters
if (!reader.hasNext()) Assert.fail("The reader should have a record");
rec = reader.next();
Assert.assertTrue(rec.isFiltered());
Assert.assertTrue(rec.getFilterString().equals("bar;baz"));
if (reader.hasNext()) Assert.fail("The reader should NOT have a record");
}
@Test
public void testHeaderNoRecords() {
VCFReader reader = new VCFReader(headerNoRecordsFile);
Assert.assertTrue(reader.getHeader().getMetaData() != null);
Assert.assertTrue(!reader.iterator().hasNext());
}
@Test
public void testHeaderSampleSpaceFile() {
VCFReader reader = new VCFReader(headerSampleSpaceFile);
Assert.assertTrue(reader.getHeader().hasGenotypingData());
Assert.assertTrue(reader.getHeader().getGenotypeSamples().size() == 1);
Assert.assertTrue(reader.getHeader().getGenotypeSamples().contains("SAMPLE NAME"));
}
}

View File

@ -1,169 +0,0 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.junit.Assert;
import org.junit.Test;
import org.junit.BeforeClass;
import java.util.*;
import java.io.File;
import java.io.FileNotFoundException;
/**
* @author aaron
* <p/>
* Class VCFRecordUnitTest
* <p/>
* test the basic functionality of the vcf record
*/
public class VCFRecordUnitTest extends BaseTest {
@BeforeClass
public static void beforeTests() {
try {
IndexedFastaSequenceFile seq = new IndexedFastaSequenceFile(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
GenomeLocParser.setupRefContigOrdering(seq);
} catch (FileNotFoundException e) {
throw new StingException("unable to load the sequence dictionary");
}
}
/**
* create a fake VCF record
*
* @param infoFields the info fields
* @return a VCFRecord
*/
private static VCFRecord makeFakeVCFRecord(Map<String, String> infoFields) {
List<VCFGenotypeEncoding> altBases = new ArrayList<VCFGenotypeEncoding>();
altBases.add(new VCFGenotypeEncoding("C"));
altBases.add(new VCFGenotypeEncoding("D1"));
List<VCFGenotypeRecord> genotypeObjects = new ArrayList<VCFGenotypeRecord>();
genotypeObjects.add(createGenotype("sample1", "A", "A"));
return new VCFRecord("A", "chr1", 1, "RANDOM", altBases, 0, ".", infoFields, "GT:AA", genotypeObjects);
}
/**
* create a fake VCF genotype record
*
* @param name the name of the sample
* @param Allele1 the first allele
* @param Allele2 the second allele
* @return a VCFGenotypeRecord
*/
private static VCFGenotypeRecord createGenotype(String name, String Allele1, String Allele2) {
List<VCFGenotypeEncoding> Alleles = new ArrayList<VCFGenotypeEncoding>();
Alleles.add(new VCFGenotypeEncoding(Allele1));
Alleles.add(new VCFGenotypeEncoding(Allele2));
VCFGenotypeRecord rec = new VCFGenotypeRecord(name, Alleles, VCFGenotypeRecord.PHASE.PHASED);
rec.setField("AA", "2");
return rec;
}
@Test
public void testAddReduntantAlts() {
List<VCFGenotypeEncoding> altBases = new ArrayList<VCFGenotypeEncoding>();
altBases.add(new VCFGenotypeEncoding("C"));
altBases.add(new VCFGenotypeEncoding("D1"));
altBases.add(new VCFGenotypeEncoding("D1"));
List<VCFGenotypeRecord> genotypeObjects = new ArrayList<VCFGenotypeRecord>();
genotypeObjects.add(createGenotype("sample1", "A", "A"));
VCFRecord rec = new VCFRecord("A", "chr1", 1, "RANDOM", altBases, 0, ".", new HashMap<String,String>(), "GT:AA", genotypeObjects);
Assert.assertEquals(2, rec.getAlternateAlleles().size());
}
@Test
public void testGetOneGenotype() {
Map<String, String> infoFields = new HashMap<String, String>();
VCFRecord rec = makeFakeVCFRecord(infoFields);
List<VCFGenotypeRecord> genotypeObjects = rec.getVCFGenotypeRecords();
Assert.assertEquals(1, genotypeObjects.size());
Assert.assertTrue(genotypeObjects.get(0).getSampleName().equals("sample1"));
Assert.assertEquals(2, genotypeObjects.get(0).getAlleles().size());
Assert.assertEquals("A", genotypeObjects.get(0).getAlleles().get(0).toString());
Assert.assertEquals("A", genotypeObjects.get(0).getAlleles().get(1).toString());
}
@Test
public void testGetGenotypes() {
Map<String, String> infoFields = new HashMap<String, String>();
VCFRecord rec = makeFakeVCFRecord(infoFields);
rec.addGenotypeRecord(createGenotype("sample2", "C", "A"));
List<VCFGenotypeRecord> genotypeObjects = rec.getVCFGenotypeRecords();
Assert.assertEquals(2, genotypeObjects.size());
Assert.assertTrue(genotypeObjects.get(0).getSampleName().equals("sample1"));
Assert.assertEquals(2, genotypeObjects.get(0).getAlleles().size());
Assert.assertEquals("A", genotypeObjects.get(0).getAlleles().get(0).toString());
Assert.assertEquals("A", genotypeObjects.get(0).getAlleles().get(1).toString());
// assert the second one
Assert.assertTrue(genotypeObjects.get(1).getSampleName().equals("sample2"));
Assert.assertEquals(2, genotypeObjects.get(1).getAlleles().size());
Assert.assertEquals("C", genotypeObjects.get(1).getAlleles().get(0).toString());
Assert.assertEquals("A", genotypeObjects.get(1).getAlleles().get(1).toString());
}
@Test
public void testCreateInfoString() {
Map<String, String> infoFields = new HashMap<String, String>();
VCFRecord rec = makeFakeVCFRecord(infoFields);
Assert.assertTrue(rec.createInfoString().equals("."));
infoFields.put("DP", "50");
VCFRecord rec2 = makeFakeVCFRecord(infoFields);
Assert.assertTrue(rec2.createInfoString().equals("DP=50"));
rec2.addInfoField("AB", "CD");
Assert.assertTrue(rec2.createInfoString().equals("DP=50;AB=CD") || rec2.createInfoString().equals("AB=CD;DP=50"));
}
@Test
public void testAddAlts() {
Map<String, String> infoFields = new HashMap<String, String>();
VCFRecord rec = makeFakeVCFRecord(infoFields);
rec.addAlternateBase(new VCFGenotypeEncoding("T"));
rec.addAlternateBase(new VCFGenotypeEncoding("T"));
rec.addAlternateBase(new VCFGenotypeEncoding("T"));
rec.addAlternateBase(new VCFGenotypeEncoding("T"));
rec.addAlternateBase(new VCFGenotypeEncoding("T"));
Assert.assertEquals(3,rec.getAlternateAlleles().size());
}
/**
* create a fake header of known quantity
*
* @return a fake VCF header
*/
public static VCFHeader createFakeHeader() {
Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
metaData.add(new VCFHeaderLine(VCFHeader.FILE_FORMAT_KEY, VCFHeader.VCF_VERSION));
metaData.add(new VCFHeaderLine("two", "2"));
Set<String> additionalColumns = new HashSet<String>();
additionalColumns.add("FORMAT");
additionalColumns.add("sample1");
return new VCFHeader(metaData, additionalColumns);
}
private static final String stringRep = "chr1\t1\tRANDOM\tA\tC,D1\t0.00\t.\tDP=50\tGT:AA\t0|0:2";
private static final String stringRep2 = "chr1\t1\tRANDOM\tA\tC,D1\t0.00\t.\tAB=CD;DP=50\tGT:AA\t0|0:2";
//private static final String stringRep3 = "chr1\t1\tRANDOM\tA\tC,D1\t0.00\t.\tAB=CD;DP=50\tGT:AA\t0|0:2";
@Test
public void testStringRepresentation() {
Map<String, String> infoFields = new HashMap<String, String>();
infoFields.put("DP", "50");
VCFRecord rec = makeFakeVCFRecord(infoFields);
String rep = rec.toStringEncoding(createFakeHeader());
Assert.assertTrue(stringRep.equals(rep));
rec.addInfoField("AB", "CD");
String rep2 = rec.toStringEncoding(createFakeHeader());
Assert.assertTrue(stringRep2.equals(rep2));
//rec.addGenotypeField(createGenotype("sample3","A","D12"));
}
}

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.StingException;