From a68f3b2e9cdf3a926941e83348a38a8330c5eb9f Mon Sep 17 00:00:00 2001 From: aaron Date: Wed, 5 May 2010 17:28:48 +0000 Subject: [PATCH] VCF moved over to tribble. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3302 348d0f76-0448-11de-a6fe-93d51630548a --- .../io/storage/GenotypeWriterStorage.java | 2 +- .../io/storage/VCFGenotypeWriterStorage.java | 6 +- .../gatk/io/stubs/VCFGenotypeWriterStub.java | 4 +- .../sting/gatk/refdata/RodVCF.java | 351 ---------- .../gatk/refdata/VariantContextAdaptors.java | 22 +- .../tracks/builders/RODTrackBuilder.java | 1 - .../builders/TribbleRMDTrackBuilder.java | 11 +- .../sting/gatk/walkers/VariantsToVCF.java | 8 +- .../gatk/walkers/annotator/Alignability.java | 2 +- .../gatk/walkers/annotator/AlleleBalance.java | 2 +- .../annotator/BaseQualityRankSumTest.java | 2 +- .../walkers/annotator/DepthOfCoverage.java | 4 +- .../annotator/DepthPerAlleleBySample.java | 2 +- .../gatk/walkers/annotator/GCContent.java | 2 +- .../walkers/annotator/HaplotypeScore.java | 3 +- .../gatk/walkers/annotator/HardyWeinberg.java | 2 +- .../walkers/annotator/HomopolymerRun.java | 2 +- .../sting/gatk/walkers/annotator/LowMQ.java | 2 +- .../annotator/MappingQualityRankSumTest.java | 2 +- .../walkers/annotator/MappingQualityZero.java | 2 +- .../gatk/walkers/annotator/QualByDepth.java | 3 +- .../QualityAdjustedSecondBaseLod.java | 2 +- .../walkers/annotator/RMSMappingQuality.java | 4 +- .../walkers/annotator/SecondBaseSkew.java | 2 +- .../walkers/annotator/SpanningDeletions.java | 2 +- .../walkers/annotator/VariantAnnotator.java | 8 +- .../annotator/VariantAnnotatorEngine.java | 6 +- .../interfaces/GenotypeAnnotation.java | 2 +- .../interfaces/InfoFieldAnnotation.java | 2 +- .../concordance/CallsetConcordanceWalker.java | 19 +- .../walkers/concordance/ConcordanceType.java | 4 +- .../walkers/concordance/IndelSubsets.java | 8 +- .../gatk/walkers/concordance/NWayVenn.java | 4 +- .../concordance/SNPGenotypeConcordance.java | 6 +- .../gatk/walkers/concordance/SimpleVenn.java | 8 +- .../filters/VariantFiltrationWalker.java | 3 +- .../DiploidGenotypeCalculationModel.java | 2 +- ...JointEstimateGenotypeCalculationModel.java | 2 +- .../walkers/genotyper/UnifiedGenotyper.java | 4 + .../sequenom/SequenomValidationConverter.java | 2 + .../oneoffprojects/refdata/HapmapVCFROD.java | 77 +- .../walkers/AlleleBalanceHistogramWalker.java | 11 +- .../walkers/IndelAnnotator.java | 13 +- .../walkers/IndelDBRateWalker.java | 5 +- .../walkers/VCFReferenceFixerWalker.java | 18 +- .../annotator/InsertSizeDistribution.java | 2 +- .../ProportionOfNonrefBasesSupportingSNP.java | 2 +- ...oportionOfRefSecondBasesSupportingSNP.java | 2 +- ...oportionOfSNPSecondBasesSupportingRef.java | 2 +- .../annotator/ThousandGenomesAnnotator.java | 2 +- .../multisample/LocusConcordanceInfo.java | 9 +- .../MultiSampleConcordanceWalker.java | 27 +- .../multisample/VCFConcordanceCalculator.java | 7 +- .../vcftools/BeagleTrioToVCFWalker.java | 3 +- .../vcftools/SimpleVCFIntersectWalker.java | 25 +- .../gatk/walkers/TrioGenotyperWalker.java | 2 +- .../walkers/annotator/GenomicAnnotation.java | 6 +- .../walkers/annotator/GenomicAnnotator.java | 8 +- .../gatk/walkers/diagnostics/SNPDensity.java | 7 +- .../AnnotationDataManager.java | 2 +- .../ApplyVariantClustersWalker.java | 8 +- .../VariantConcordanceROCCurveWalker.java | 6 +- .../gatk/walkers/vcftools/VCFCombine.java | 33 +- .../walkers/vcftools/VCFSelectWalker.java | 20 +- .../walkers/vcftools/VCFSubsetWalker.java | 14 +- .../playground/tools/vcf/VCFApplyCuts.java | 4 +- .../playground/tools/vcf/VCFCallRates.java | 9 +- .../playground/tools/vcf/VCFHomogenizer.java | 130 +--- .../sting/playground/tools/vcf/VCFMerge.java | 6 +- .../playground/tools/vcf/VCFOptimize.java | 7 +- .../tools/vcf/VCFSequenomAnalysis.java | 9 +- .../tools/vcf/VCFSequenomAnalysis2.java | 11 +- .../sting/playground/tools/vcf/VCFTool.java | 38 +- .../sting/utils/SampleUtils.java | 9 +- .../utils/genotype/GenotypeWriterFactory.java | 1 + .../sting/utils/genotype/glf/GLFWriter.java | 2 +- .../genotype/vcf/VCFFilterHeaderLine.java | 60 -- .../genotype/vcf/VCFFormatHeaderLine.java | 80 --- .../genotype/vcf/VCFGenotypeEncoding.java | 127 ---- .../utils/genotype/vcf/VCFGenotypeRecord.java | 353 ---------- .../utils/genotype/vcf/VCFGenotypeWriter.java | 4 +- .../vcf/VCFGenotypeWriterAdapter.java | 6 +- .../sting/utils/genotype/vcf/VCFHeader.java | 151 ---- .../utils/genotype/vcf/VCFHeaderLine.java | 86 --- .../utils/genotype/vcf/VCFInfoHeaderLine.java | 75 -- .../utils/genotype/vcf/VCFParameters.java | 3 + .../sting/utils/genotype/vcf/VCFReader.java | 304 +------- .../sting/utils/genotype/vcf/VCFRecord.java | 658 ------------------ .../sting/utils/genotype/vcf/VCFUtils.java | 17 +- .../sting/utils/genotype/vcf/VCFWriter.java | 6 +- .../sting/gatk/refdata/RodVCFUnitTest.java | 181 ----- .../RodSystemValidationIntegrationTest.java | 6 +- .../vcf/VCFGenotypeEncodingUnitTest.java | 151 ---- .../utils/genotype/vcf/VCFHeaderUnitTest.java | 52 -- .../genotype/vcf/VCFIntegrationTest.java | 18 - .../utils/genotype/vcf/VCFReaderUnitTest.java | 366 ---------- .../utils/genotype/vcf/VCFRecordUnitTest.java | 169 ----- .../utils/genotype/vcf/VCFWriterUnitTest.java | 1 + 98 files changed, 356 insertions(+), 3580 deletions(-) delete mode 100755 java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java delete mode 100755 java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFFilterHeaderLine.java delete mode 100755 java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFFormatHeaderLine.java delete mode 100644 java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeEncoding.java delete mode 100644 java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java delete mode 100644 java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFHeader.java delete mode 100755 java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderLine.java delete mode 100755 java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFInfoHeaderLine.java delete mode 100644 java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java delete mode 100755 java/test/org/broadinstitute/sting/gatk/refdata/RodVCFUnitTest.java delete mode 100644 java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeEncodingUnitTest.java delete mode 100644 java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderUnitTest.java delete mode 100755 java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFIntegrationTest.java delete mode 100644 java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFReaderUnitTest.java delete mode 100755 java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFRecordUnitTest.java diff --git a/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java b/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java index 52c58d99f..a0351a81e 100755 --- a/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java +++ b/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java @@ -29,10 +29,10 @@ import java.io.*; import java.util.Set; import java.util.HashSet; +import org.broad.tribble.vcf.VCFHeaderLine; import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.utils.genotype.*; -import org.broadinstitute.sting.utils.genotype.vcf.*; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.StingException; diff --git a/java/src/org/broadinstitute/sting/gatk/io/storage/VCFGenotypeWriterStorage.java b/java/src/org/broadinstitute/sting/gatk/io/storage/VCFGenotypeWriterStorage.java index 27150e5de..d142a5ffb 100644 --- a/java/src/org/broadinstitute/sting/gatk/io/storage/VCFGenotypeWriterStorage.java +++ b/java/src/org/broadinstitute/sting/gatk/io/storage/VCFGenotypeWriterStorage.java @@ -1,10 +1,10 @@ package org.broadinstitute.sting.gatk.io.storage; -import org.broadinstitute.sting.utils.genotype.vcf.VCFReader; +import org.broad.tribble.vcf.VCFHeaderLine; +import org.broad.tribble.vcf.VCFRecord; import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeWriter; -import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; -import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine; import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub; +import org.broadinstitute.sting.utils.genotype.vcf.VCFReader; import java.io.File; import java.util.Set; diff --git a/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFGenotypeWriterStub.java b/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFGenotypeWriterStub.java index fa03cc340..a8f69e9ed 100644 --- a/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFGenotypeWriterStub.java +++ b/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFGenotypeWriterStub.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.io.stubs; +import org.broad.tribble.vcf.VCFHeaderLine; +import org.broad.tribble.vcf.VCFRecord; import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeWriter; -import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import java.io.File; diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java b/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java deleted file mode 100755 index b0c5fbc01..000000000 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java +++ /dev/null @@ -1,351 +0,0 @@ -package org.broadinstitute.sting.gatk.refdata; - -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.genotype.vcf.*; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.util.*; - - -/** - * @author aaron - *

- * Class RodVCF - *

- * An implementation of the ROD for VCF. - */ -public class RodVCF extends BasicReferenceOrderedDatum implements Iterator { - public VCFReader getReader() { - return mReader; - } - - // our VCF related information - private VCFReader mReader; - - public VCFRecord getRecord() { - return mCurrentRecord; - } - - public VCFRecord mCurrentRecord; - - public RodVCF(String name) { - super(name); - } - - public RodVCF(String name, VCFRecord currentRecord, VCFReader reader) { - super(name); - mCurrentRecord = currentRecord; - mReader = reader; - } - - public void assertNotNull() { - if ( mCurrentRecord == null ) { - throw new UnsupportedOperationException("The current Record is null"); - } - } - - @Override - public boolean parseLine(Object header, String[] parts) throws IOException { - throw new UnsupportedOperationException("RodVCF does not support the parseLine method"); - } - - public Object initialize(final File source) throws FileNotFoundException { - if ( mReader == null ) { - mReader = new VCFReader(source); - } - return mReader.getHeader(); - } - - @Override - public String toString() { - return (mCurrentRecord != null ? mCurrentRecord.toStringEncoding(mReader.getHeader()) : ""); - } - - public static RodVCF createIterator(String name, File file) { - RodVCF vcf = new RodVCF(name); - try { - vcf.initialize(file); - } catch (FileNotFoundException e) { - throw new StingException("Unable to find file " + file); - } - return vcf; - } - - public boolean hasNonRefAlleleFrequency() { - assertNotNull(); - return mCurrentRecord.getNonRefAlleleFrequency() > 0.0; - } - - /** - * get the frequency of this variant - * - * @return VariantFrequency with the stored frequency - */ - public double getNonRefAlleleFrequency() { - assertNotNull(); - return mCurrentRecord.getNonRefAlleleFrequency(); - } - - public String getID() { - assertNotNull(); - return mCurrentRecord.getID(); - } - - /** - * are we a SNP? If not we're a Indel/deletion - * - * @return true if we're a SNP - */ - public boolean isSNP() { - assertNotNull(); - return mCurrentRecord.isSNP(); - } - - /** - * are we a novel site? Is there a DBSNP identifier - * or a hapmap entry for the site? - * - * @return true if this site is novel - */ - public boolean isNovel() { - assertNotNull(); - return mCurrentRecord.isNovel(); - } - - /** - * are we an insertion? - * - * @return true if we are, false otherwise - */ - public boolean isInsertion() { - assertNotNull(); - return mCurrentRecord.isInsertion(); - } - - /** - * are we an insertion? - * - * @return true if we are, false otherwise - */ - public boolean isDeletion() { - assertNotNull(); - return mCurrentRecord.isDeletion(); - } - - @Override - public GenomeLoc getLocation() { - assertNotNull(); - return mCurrentRecord.getLocation(); - } - - /** - * get the reference base(s) at this position - * - * @return the reference base or bases, as a string - */ - public String getReference() { - assertNotNull(); - return mCurrentRecord.getReference(); - } - - /** are we bi-allelic? - * @return true if we're bi-allelic - */ - public boolean isBiallelic() { - assertNotNull(); - return mCurrentRecord.isBiallelic(); - } - - /** - * get the -1 * (log 10 of the error value) - * - * @return the log based error estimate - */ - public double getNegLog10PError() { - assertNotNull(); - return mCurrentRecord.getNegLog10PError(); - } - - public double getQual() { - assertNotNull(); - return mCurrentRecord.getQual(); - } - - public boolean hasAlternateAllele() { - assertNotNull(); - return mCurrentRecord.hasAlternateAllele(); - } - - /** - * gets the alternate alleles. This method should return all the alleles present at the location, - * NOT including the reference base. This is returned as a string list with no guarantee ordering - * of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest - * frequency). - * - * @return an alternate allele list - */ - public List getAlternateAlleleList() { - assertNotNull(); - return mCurrentRecord.getAlternateAlleleList(); - } - - /** - * gets the alleles. This method should return all the alleles present at the location, - * including the reference base. The first allele should always be the reference allele, followed - * by an unordered list of alternate alleles. - * - * @return an alternate allele list - */ - public List getAlleleList() { - assertNotNull(); - return mCurrentRecord.getAlleleList(); - } - - /** - * are we truely a variant, given a reference - * - * @return false if we're a variant(indel, delete, SNP, etc), true if we're not - */ - public boolean isReference() { - assertNotNull(); - return mCurrentRecord.isReference(); - } - - /** - * are we an insertion or a deletion? yes, then return true. No? Well, false then. - * - * @return true if we're an insertion or deletion - */ - public boolean isIndel() { - assertNotNull(); - return mCurrentRecord.isIndel(); - } - - /** - * gets the alternate base is the case of a SNP. Throws an IllegalStateException if we're not a SNP - * of - * - * @return a char, representing the alternate base - */ - public char getAlternativeBaseForSNP() { - assertNotNull(); - return mCurrentRecord.getAlternativeBaseForSNP(); - } - - /** - * gets the reference base is the case of a SNP. Throws an IllegalStateException if we're not a SNP - * - * @return a char, representing the alternate base - */ - public char getReferenceForSNP() { - assertNotNull(); - return mCurrentRecord.getReferenceForSNP(); - } - - public boolean hasGenotypeData() { - assertNotNull(); - return mCurrentRecord.hasGenotypeData(); - } - - /** - * get the genotypes - * - * @return a list of the genotypes - */ - public List getVCFGenotypeRecords() { - assertNotNull(); - return mCurrentRecord.getVCFGenotypeRecords(); - } - - /** - * Returns the genotype associated with sample, or null if the genotype is missing - * - * @param sampleName the name of the sample genotype to fetch - * @return genotype record - */ - public VCFGenotypeRecord getGenotype(final String sampleName) { - return mCurrentRecord.getGenotype(sampleName); - } - - public String[] getSampleNames() { - assertNotNull(); - return mCurrentRecord.getSampleNames(); - } - -// public Map getSampleGenotypes() { -// String[] samples = getSampleNames(); -// List genotypes = getGenotypes(); -// HashMap map = new HashMap(); -// -// for ( int i = 0; i < samples.length; i++ ) { -// map.put(samples[i], genotypes.get(i)); -// } -// -// return map; -// } - - public Map getInfoValues() { - assertNotNull(); - return mCurrentRecord.getInfoValues(); - } - - public String[] getFilteringCodes() { - assertNotNull(); - return mCurrentRecord.getFilteringCodes(); - } - - public boolean isFiltered() { - assertNotNull(); - return mCurrentRecord.isFiltered(); - } - -// public boolean hasFilteringCodes() { -// assertNotNull(); -// return mCurrentRecord.hasFilteringCodes(); -// } - - public String getFilterString() { - assertNotNull(); - return mCurrentRecord.getFilterString(); - } - - public VCFHeader getHeader() { - return mReader.getHeader(); - } - - public boolean hasNext() { - return mReader.hasNext(); - } - - /** - * @return the next element in the iteration. - * @throws NoSuchElementException - iterator has no more elements. - */ - public RodVCF next() { - if (!this.hasNext()) throw new NoSuchElementException("RodVCF next called on iterator with no more elements"); - - // get the next record - VCFRecord rec = mReader.next(); - - // make sure the next VCF record isn't before the current record (we'll accept at the same location, the - // spec doesn't indicate, and it seems like a valid use case) - GenomeLoc curPosition = null; - if (mCurrentRecord != null) curPosition = mCurrentRecord.getLocation(); - if (curPosition != null && rec != null && curPosition.compareTo(rec.getLocation()) > 0) - throw new StingException("The next VCF record appears to be before the current (current location => " + curPosition.toString() + - ", the next record position => " + rec.getLocation().toString() + " with line : " + rec.toStringEncoding(mReader.getHeader()) + "). " + - "Check to make sure the input VCF file is correctly sorted."); - - // save off the previous record. This is needed given how iterators are used in the ROD system; - // we need to save off the last record - mCurrentRecord = rec; - return new RodVCF(name, rec, mReader); - } - - public void remove() { - throw new UnsupportedOperationException("The remove operation is not supported for a VCF rod"); - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index 4556c7907..11adf5a0b 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -4,6 +4,7 @@ import edu.mit.broad.picard.genotype.DiploidGenotype; import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods; import org.broad.tribble.dbsnp.DbSNPFeature; import org.broad.tribble.gelitext.GeliTextFeature; +import org.broad.tribble.vcf.*; import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableGenotype; @@ -16,7 +17,6 @@ import org.broadinstitute.sting.utils.genotype.LikelihoodObject; import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter; import org.broadinstitute.sting.utils.genotype.glf.GLFSingleCall; import org.broadinstitute.sting.utils.genotype.glf.GLFWriter; -import org.broadinstitute.sting.utils.genotype.vcf.*; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import java.util.*; @@ -46,7 +46,6 @@ public class VariantContextAdaptors { static { adaptors.put(DbSNPFeature.class, new DBSnpAdaptor()); - adaptors.put(RodVCF.class, new RodVCFAdaptor()); adaptors.put(VCFRecord.class, new VCFRecordAdaptor()); adaptors.put(PlinkRod.class, new PlinkRodAdaptor()); adaptors.put(HapMapROD.class, new HapMapAdaptor()); @@ -132,23 +131,6 @@ public class VariantContextAdaptors { } } - // -------------------------------------------------------------------------------------------------------------- - // - // VCF to VariantContext and back again - // - // -------------------------------------------------------------------------------------------------------------- - - private static class RodVCFAdaptor extends VCAdaptor { - // WARNING: do not use this method if you have anything other than point mutations in your VCF - VariantContext convert(String name, Object input) { - return vcfToVariantContext(name, ((RodVCF)input).getRecord(), null); - } - - VariantContext convert(String name, Object input, ReferenceContext ref) { - return vcfToVariantContext(name, ((RodVCF)input).getRecord(), ref); - } - } - private static class VCFRecordAdaptor extends VCAdaptor { // WARNING: do not use this method if you have anything other than point mutations in your VCF VariantContext convert(String name, Object input) { @@ -229,7 +211,7 @@ public class VariantContextAdaptors { double qual = vcf.isMissingQual() ? VariantContext.NO_NEG_LOG_10PERROR : vcf.getNegLog10PError(); - GenomeLoc loc = vcf.getLocation(); + GenomeLoc loc = GenomeLocParser.createGenomeLoc(vcf.getChr(),vcf.getStart()); if ( vcf.isDeletion() ) loc = GenomeLocParser.createGenomeLoc(loc.getContig(), loc.getStart(), loc.getStart()+refAllele.length()-1); diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java index 206caea03..431d49ce7 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java @@ -66,7 +66,6 @@ public class RODTrackBuilder implements RMDTrackBuilder { Types.put("HapMap", HapMapROD.class); Types.put("Intervals", IntervalRod.class); Types.put("GLF", RodGLF.class); - Types.put("VCF", RodVCF.class); Types.put("PicardDbSNP", rodPicardDbSNP.class); Types.put("HapmapVCF", HapmapVCFROD.class); Types.put("Beagle", BeagleROD.class); diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilder.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilder.java index 8e7381ecf..7c9959284 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilder.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilder.java @@ -67,8 +67,7 @@ public class TribbleRMDTrackBuilder extends PluginManager implemen public Map getAvailableTrackNamesAndTypes() { Map classes = new HashMap(); for (String c : this.pluginsByName.keySet()) - // we're excluding VCF right now - if (!c.contains("VCF")) classes.put(c,this.pluginsByName.get(c)); + classes.put(c,this.pluginsByName.get(c)); return classes; } @@ -88,15 +87,13 @@ public class TribbleRMDTrackBuilder extends PluginManager implemen // make a feature reader FeatureReader reader; try { - FeatureCodec codec = this.createByType(targetClass); - // check to see if the input file has an index if (!(new File(inputFile.getAbsolutePath() + linearIndexExtension).canRead())) { - LinearIndex index = createIndex(inputFile, codec); - reader = new FeatureReader(inputFile,index, codec); + LinearIndex index = createIndex(inputFile, this.createByType(targetClass)); + reader = new FeatureReader(inputFile,index, this.createByType(targetClass)); } else { - reader = new FeatureReader(inputFile,codec); + reader = new FeatureReader(inputFile,this.createByType(targetClass)); } } catch (FileNotFoundException e) { throw new StingException("Unable to create reader with file " + inputFile, e); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/VariantsToVCF.java b/java/src/org/broadinstitute/sting/gatk/walkers/VariantsToVCF.java index cbeb9a297..add373434 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/VariantsToVCF.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/VariantsToVCF.java @@ -26,6 +26,10 @@ package org.broadinstitute.sting.gatk.walkers; import org.broad.tribble.dbsnp.DbSNPFeature; +import org.broad.tribble.vcf.VCFGenotypeRecord; +import org.broad.tribble.vcf.VCFHeader; +import org.broad.tribble.vcf.VCFHeaderLine; +import org.broad.tribble.vcf.VCFRecord; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; @@ -95,8 +99,8 @@ public class VariantsToVCF extends RodWalker { throw new IllegalStateException("VCF record was created, but no rod data is present"); Object rod = rods.get(0); - if ( rod instanceof RodVCF ) - samples.addAll(Arrays.asList(((RodVCF)rod).getSampleNames())); + if ( rod instanceof VCFRecord ) + samples.addAll(Arrays.asList(((VCFRecord)rod).getSampleNames())); else if ( rod instanceof HapMapROD ) samples.addAll(Arrays.asList(((HapMapROD)rod).getSampleIDs())); else diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/Alignability.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/Alignability.java index 8ce4c425d..f4f481c9b 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/Alignability.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/Alignability.java @@ -1,12 +1,12 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.TabularROD; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; -import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import java.util.HashMap; import java.util.Map; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java index 85ae80959..48518cfd6 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java @@ -25,13 +25,13 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import java.util.Map; import java.util.HashMap; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java index 87857ca76..7d35e207c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java @@ -1,8 +1,8 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import java.util.List; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java index 243c49fff..39e072e4d 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java @@ -1,12 +1,12 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.vcf.VCFInfoHeaderLine; +import org.broad.tribble.vcf.VCFRecord; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; -import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; -import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import java.util.Map; import java.util.HashMap; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java index 55939176c..6c9519554 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java @@ -1,10 +1,10 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.vcf.VCFFormatHeaderLine; import org.broadinstitute.sting.gatk.contexts.*; import org.broadinstitute.sting.gatk.contexts.variantcontext.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; -import org.broadinstitute.sting.utils.genotype.vcf.VCFFormatHeaderLine; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.PileupElement; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java index 2a78d26e0..8430f68e5 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java @@ -1,12 +1,12 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import java.util.Map; import java.util.HashMap; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java index 5fcd979cd..3ad9b12ca 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; @@ -36,7 +37,7 @@ import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ExtendedPileupElement; -import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; + import java.util.*; import net.sf.samtools.SAMRecord; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java index 90f712bff..edd579490 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java @@ -1,11 +1,11 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; -import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.genotype.HardyWeinbergCalculation; import org.broadinstitute.sting.utils.QualityUtils; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java index 234139a17..3d1a75cc8 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; @@ -7,7 +8,6 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import java.util.Map; import java.util.HashMap; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java index d1a14e1c0..1100db5c8 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; @@ -7,7 +8,6 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import java.util.Map; import java.util.HashMap; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java index 69c0e3273..07dc64102 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java @@ -1,8 +1,8 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import java.util.List; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java index ebeca20a8..dee2dd145 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; @@ -8,7 +9,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnot import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import java.util.Map; import java.util.HashMap; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java index 1645ed20a..1faed4335 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java @@ -1,15 +1,14 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; -import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import java.util.Map; -import java.util.ArrayList; import java.util.HashMap; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualityAdjustedSecondBaseLod.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualityAdjustedSecondBaseLod.java index 481945b08..31c738541 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualityAdjustedSecondBaseLod.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualityAdjustedSecondBaseLod.java @@ -1,11 +1,11 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; -import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import java.util.Map; import java.util.HashMap; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java index 14e752de9..200d035be 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.vcf.VCFInfoHeaderLine; +import org.broad.tribble.vcf.VCFRecord; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; @@ -9,8 +11,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnota import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; -import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import java.util.Map; import java.util.ArrayList; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkew.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkew.java index 5e0017e73..f1b0935b9 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkew.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkew.java @@ -25,11 +25,11 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java index a251de39b..f4371184e 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; @@ -7,7 +8,6 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import java.util.Map; import java.util.HashMap; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index a15dc0001..d447b48ec 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -25,12 +25,14 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.vcf.VCFHeader; +import org.broad.tribble.vcf.VCFHeaderLine; +import org.broad.tribble.vcf.VCFRecord; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.RodVCF; import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotationType; @@ -41,8 +43,6 @@ import org.broadinstitute.sting.utils.classloader.PackageUtils; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.utils.genotype.vcf.VCFHeader; -import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils; import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; @@ -184,7 +184,7 @@ public class VariantAnnotator extends LocusWalker { } } - if ( variant instanceof RodVCF ) { + if ( variant instanceof VCFRecord) { for(VariantContext annotatedVC : annotatedVCs ) { vcfWriter.addRecord(VariantContextAdaptors.toVCF(annotatedVC, ref.getBase())); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index 1b7cbc5bb..56f842533 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -26,6 +26,9 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broad.tribble.dbsnp.DbSNPFeature; +import org.broad.tribble.vcf.VCFHeaderLine; +import org.broad.tribble.vcf.VCFInfoHeaderLine; +import org.broad.tribble.vcf.VCFRecord; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; @@ -41,9 +44,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnot import org.broadinstitute.sting.playground.gatk.walkers.annotator.GenomicAnnotation; import org.broadinstitute.sting.utils.classloader.PackageUtils; import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; import java.util.*; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java index 7efb0f62c..9eafd8eaf 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java @@ -1,11 +1,11 @@ package org.broadinstitute.sting.gatk.walkers.annotator.interfaces; +import org.broad.tribble.vcf.VCFFormatHeaderLine; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; -import org.broadinstitute.sting.utils.genotype.vcf.VCFFormatHeaderLine; import java.util.Map; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java index dff773954..23e079358 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java @@ -1,10 +1,10 @@ package org.broadinstitute.sting.gatk.walkers.annotator.interfaces; +import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import java.util.Map; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceWalker.java index 6546dd26c..bd9210f22 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceWalker.java @@ -25,10 +25,13 @@ package org.broadinstitute.sting.gatk.walkers.concordance; +import org.broad.tribble.vcf.VCFGenotypeRecord; +import org.broad.tribble.vcf.VCFHeader; +import org.broad.tribble.vcf.VCFHeaderLine; +import org.broad.tribble.vcf.VCFRecord; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.RodVCF; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.classloader.PackageUtils; @@ -157,12 +160,14 @@ public class CallsetConcordanceWalker extends RodWalker { return 0; // get all of the vcf rods at this locus - ArrayList vcfRods = new ArrayList(); + Map vcfRods = new LinkedHashMap(); Iterator rods = rodData.getAllRods().iterator(); while (rods.hasNext()) { GATKFeature rod = rods.next(); - if ( rod.getUnderlyingObject() instanceof RodVCF ) - vcfRods.add((RodVCF)rod.getUnderlyingObject()); + if ( rod.getUnderlyingObject() instanceof VCFRecord ) { + if (vcfRods.containsKey(rod)) throw new StingException("Duplicate VCF's found"); + vcfRods.put((VCFRecord)rod.getUnderlyingObject(),rod.getName()); + } } if ( vcfRods.size() == 0 ) @@ -171,12 +176,12 @@ public class CallsetConcordanceWalker extends RodWalker { // pull out all of the individual calls from the rods and insert into a map based on the // mapping from rod/sample to uniquified name HashMap samplesToRecords = new HashMap(); - for ( RodVCF rod : vcfRods ) { + for ( VCFRecord rod : vcfRods.keySet() ) { List records = rod.getVCFGenotypeRecords(); for ( VCFGenotypeRecord vcfRec : records ) { - String uniquifiedSample = rodNamesToSampleNames.get(new Pair(rod.getName(), vcfRec.getSampleName())); + String uniquifiedSample = rodNamesToSampleNames.get(new Pair(vcfRods.get(rod), vcfRec.getSampleName())); if ( uniquifiedSample == null ) - throw new StingException("Unexpected sample encountered: " + vcfRec.getSampleName() + " in rod " + rod.getName()); + throw new StingException("Unexpected sample encountered: " + vcfRec.getSampleName() + " in rod " + vcfRods.get(rod)); samplesToRecords.put(uniquifiedSample, vcfRec); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/ConcordanceType.java b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/ConcordanceType.java index a1c9ff703..124da2d46 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/ConcordanceType.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/ConcordanceType.java @@ -1,8 +1,8 @@ package org.broadinstitute.sting.gatk.walkers.concordance; +import org.broad.tribble.vcf.VCFGenotypeRecord; +import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord; import java.util.Map; import java.util.Set; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/IndelSubsets.java b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/IndelSubsets.java index 5c5623926..7e9826f51 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/IndelSubsets.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/IndelSubsets.java @@ -1,11 +1,11 @@ package org.broadinstitute.sting.gatk.walkers.concordance; +import org.broad.tribble.vcf.VCFGenotypeRecord; +import org.broad.tribble.vcf.VCFInfoHeaderLine; +import org.broad.tribble.vcf.VCFRecord; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord; -import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; import java.util.*; @@ -103,5 +103,5 @@ public class IndelSubsets implements ConcordanceType { } public String getInfoName() { return "IndelSubsets"; } - public VCFInfoHeaderLine getInfoDescription() { return new VCFInfoHeaderLine(getInfoName(), 1, VCFInfoHeaderLine.INFO_TYPE.String, "Indel-related subsets"); } + public VCFInfoHeaderLine getInfoDescription() { return new VCFInfoHeaderLine(getInfoName(), 1, VCFInfoHeaderLine.INFO_TYPE.String, "Indel-related subsets"); } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/NWayVenn.java b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/NWayVenn.java index 6a6740612..5cb852300 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/NWayVenn.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/NWayVenn.java @@ -1,8 +1,8 @@ package org.broadinstitute.sting.gatk.walkers.concordance; +import org.broad.tribble.vcf.VCFGenotypeRecord; +import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord; import java.util.*; import java.util.Map.Entry; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SNPGenotypeConcordance.java b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SNPGenotypeConcordance.java index 521b618f6..62a53139a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SNPGenotypeConcordance.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SNPGenotypeConcordance.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.concordance; +import org.broad.tribble.vcf.VCFGenotypeRecord; +import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord; import java.util.*; @@ -116,5 +116,5 @@ public class SNPGenotypeConcordance implements ConcordanceType { } public String getInfoName() { return "SnpConcordance"; } - public VCFInfoHeaderLine getInfoDescription() { return new VCFInfoHeaderLine(getInfoName(), 1, VCFInfoHeaderLine.INFO_TYPE.String, "SNP concordance test"); } + public VCFInfoHeaderLine getInfoDescription() { return new VCFInfoHeaderLine(getInfoName(), 1, VCFInfoHeaderLine.INFO_TYPE.String, "SNP concordance test"); } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SimpleVenn.java b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SimpleVenn.java index e028becae..1e7c9f6eb 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SimpleVenn.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SimpleVenn.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.concordance; +import org.broad.tribble.vcf.VCFGenotypeRecord; +import org.broad.tribble.vcf.VCFInfoHeaderLine; +import org.broad.tribble.vcf.VCFRecord; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord; -import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; import org.broadinstitute.sting.utils.StingException; import java.util.*; @@ -64,5 +64,5 @@ public class SimpleVenn implements ConcordanceType { } public String getInfoName() { return "Venn"; } - public VCFInfoHeaderLine getInfoDescription() { return new VCFInfoHeaderLine(getInfoName(), 1, VCFInfoHeaderLine.INFO_TYPE.String, "2-way Venn split"); } + public VCFInfoHeaderLine getInfoDescription() { return new VCFInfoHeaderLine(getInfoName(), 1, VCFInfoHeaderLine.INFO_TYPE.String, "2-way Venn split"); } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index cf6a3e182..9617d6295 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -25,7 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.filters; -import org.apache.commons.jexl.Expression; +import org.broad.tribble.vcf.*; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; @@ -40,6 +40,7 @@ import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.utils.genotype.vcf.*; +import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; import java.util.*; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java index 55813a2fa..37fecd640 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java @@ -25,11 +25,11 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; +import org.broad.tribble.vcf.VCFGenotypeRecord; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.genotype.DiploidGenotype; import org.broadinstitute.sting.utils.genotype.CalledGenotype; -import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java index 8c1771d8a..b1ec3a81a 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broad.tribble.dbsnp.DbSNPFeature; +import org.broad.tribble.vcf.VCFRecord; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.pileup.*; -import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.*; import org.broadinstitute.sting.gatk.contexts.variantcontext.*; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 88ab0d390..8643a68f0 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -25,6 +25,10 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; +import org.broad.tribble.vcf.VCFGenotypeRecord; +import org.broad.tribble.vcf.VCFHeaderLine; +import org.broad.tribble.vcf.VCFInfoHeaderLine; +import org.broad.tribble.vcf.VCFRecord; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/SequenomValidationConverter.java b/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/SequenomValidationConverter.java index 54b17e4ea..6959758fc 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/SequenomValidationConverter.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/SequenomValidationConverter.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.sequenom; +import org.broad.tribble.vcf.*; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; @@ -38,6 +39,7 @@ import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.utils.genotype.vcf.*; +import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; import java.util.*; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/refdata/HapmapVCFROD.java b/java/src/org/broadinstitute/sting/oneoffprojects/refdata/HapmapVCFROD.java index 030b3375a..d68c00265 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/refdata/HapmapVCFROD.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/refdata/HapmapVCFROD.java @@ -1,13 +1,13 @@ package org.broadinstitute.sting.oneoffprojects.refdata; +import org.broad.tribble.vcf.VCFHeader; +import org.broad.tribble.vcf.VCFRecord; import org.broadinstitute.sting.gatk.refdata.BasicReferenceOrderedDatum; -import org.broadinstitute.sting.gatk.refdata.RodVCF; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.genotype.DiploidGenotype; -import org.broadinstitute.sting.utils.genotype.vcf.VCFHeader; import org.broadinstitute.sting.utils.genotype.vcf.VCFReader; -import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; import java.io.File; import java.io.FileNotFoundException; @@ -23,14 +23,15 @@ public class HapmapVCFROD extends BasicReferenceOrderedDatum implements Iterator // This is a (hopefully temporary) wrapper class for certain VCF files that we want to protect from // utilities that grab genotypes or sample names across all VCF files - private RodVCF rod; + private VCFRecord record; + private VCFReader reader; public VCFReader getReader() { - return rod.getReader(); + return reader; } public VCFRecord getRecord() { - return rod.getRecord(); + return record; } public HapmapVCFROD(String name) { @@ -39,85 +40,77 @@ public class HapmapVCFROD extends BasicReferenceOrderedDatum implements Iterator public HapmapVCFROD(String name, VCFRecord currentRecord, VCFReader reader) { super(name); - rod = new RodVCF(name,currentRecord,reader); - } - - public HapmapVCFROD(String name, RodVCF rod) { - super(name); - this.rod = rod; + this.record = currentRecord; + this.reader = reader; } public Object initialize(final File source) throws FileNotFoundException { - rod = new RodVCF(name); - rod.initialize(source); - return rod.getHeader(); + reader = new VCFReader(source); + if (reader.hasNext()) record = reader.next(); + return reader.getHeader(); } public boolean parseLine(Object obj, String[] args) { - try { - return rod.parseLine(obj,args); - } catch (Exception e) { - throw new UnsupportedOperationException("Parse line not supported",e); - } + return false; } public double getNegLog10PError() { - return rod.getNegLog10PError(); + return record.getNegLog10PError(); } public String getReference() { - return rod.getReference(); + return record.getReference(); } public String toString() { - return rod.toString(); + return record.toString(); } public List getAlternateAlleleList() { - return rod.getAlternateAlleleList(); + return record.getAlternateAlleleList(); } public boolean isDeletion() { - return rod.isDeletion(); + return record.isDeletion(); } public GenomeLoc getLocation() { - return rod.getLocation(); + return GenomeLocParser.createGenomeLoc(record.getChr(),record.getStart()); } public boolean isBiallelic() { - return rod.isBiallelic(); + return record.isBiallelic(); } public boolean isIndel() { - return rod.isIndel(); + return record.isIndel(); } public boolean isSNP() { - return rod.isSNP(); + return record.isSNP(); } public boolean isReference() { - return rod.isReference(); + return record.isReference(); } public double getNonRefAlleleFrequency() { - return rod.getNonRefAlleleFrequency(); + return record.getNonRefAlleleFrequency(); } public char getAlternativeBaseForSNP() { - return rod.getAlternativeBaseForSNP(); + return record.getAlternativeBaseForSNP(); } public boolean isInsertion() { - return rod.isInsertion(); + return record.isInsertion(); } public List getAlleleList() { - return rod.getAlleleList(); + return record.getAlleleList(); } public char getReferenceForSNP() { - return rod.getReferenceForSNP(); + return record.getReferenceForSNP(); } public boolean hasGenotype(DiploidGenotype g) { @@ -126,29 +119,29 @@ public class HapmapVCFROD extends BasicReferenceOrderedDatum implements Iterator } public VCFHeader getHeader() { - return rod.getHeader(); + return record.getHeader(); } public boolean hasNext() { - return rod.hasNext(); + return reader.hasNext(); } public HapmapVCFROD next() { - return new HapmapVCFROD(name,rod.next()); + return new HapmapVCFROD(name, record, reader); } public void remove() { - rod.remove(); + throw new UnsupportedOperationException("Unable to remove"); } public static HapmapVCFROD createIterator(String name, File file) { - RodVCF vcf = new RodVCF(name); + HapmapVCFROD vcf = new HapmapVCFROD(name); try { vcf.initialize(file); } catch (FileNotFoundException e) { - throw new StingException("Unable to find file " + file); + throw new StingException("Unable to load file",e); } - return new HapmapVCFROD(name,vcf); + return vcf; } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/AlleleBalanceHistogramWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/AlleleBalanceHistogramWalker.java index 932219db6..0448da9f8 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/AlleleBalanceHistogramWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/AlleleBalanceHistogramWalker.java @@ -1,16 +1,16 @@ package org.broadinstitute.sting.oneoffprojects.walkers; +import org.broad.tribble.vcf.VCFCodec; +import org.broad.tribble.vcf.VCFRecord; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.RodVCF; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.RMD; import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; import org.broadinstitute.sting.utils.pileup.PileupElement; import java.util.*; @@ -22,7 +22,7 @@ import java.util.*; * Time: 3:25:11 PM * To change this template use File | Settings | File Templates. */ -@Requires(value= DataSource.REFERENCE,referenceMetaData = {@RMD(name="variants",type= RodVCF.class)}) +@Requires(value= DataSource.REFERENCE,referenceMetaData = {@RMD(name="variants",type= VCFCodec.class)}) public class AlleleBalanceHistogramWalker extends LocusWalker, Map>> { @@ -47,12 +47,11 @@ public class AlleleBalanceHistogramWalker extends LocusWalker } public Map map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - RodVCF vcfRod = tracker.lookup("variants",RodVCF.class); + VCFRecord record = tracker.lookup("variants", VCFRecord.class); - if ( vcfRod == null ) { + if ( record == null ) { return null; } - VCFRecord record = vcfRod.getRecord(); return getAlleleBalanceBySample(record,ref,context); } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java index 3ffa8fd11..7d0817c19 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java @@ -1,15 +1,16 @@ package org.broadinstitute.sting.oneoffprojects.walkers; +import org.broad.tribble.vcf.VCFHeader; +import org.broad.tribble.vcf.VCFHeaderLine; +import org.broad.tribble.vcf.VCFInfoHeaderLine; +import org.broad.tribble.vcf.VCFRecord; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeatureIterator; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.genotype.vcf.*; @@ -93,12 +94,12 @@ public class IndelAnnotator extends RodWalker{ Object variant = rods.get(0); - if ( variant instanceof RodVCF ) { + if ( variant instanceof VCFRecord) { RODRecordList annotationList = (refseqIterator == null ? null : refseqIterator.seekForward(ref.getLocus())); String annotationString = (refseqIterator == null ? "" : getAnnotationString(annotationList)); annotationString = annotationString.split("\\s+")[0]; - ((RodVCF) variant).getRecord().addInfoField("type",annotationString); - vcfWriter.addRecord(((RodVCF) variant).getRecord()); + ((VCFRecord) variant).addInfoField("type",annotationString); + vcfWriter.addRecord((VCFRecord) variant); } else { throw new StingException("This one-off walker only deals with VCF files."); } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelDBRateWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelDBRateWalker.java index e6301e3b7..5f143d502 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelDBRateWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelDBRateWalker.java @@ -1,11 +1,12 @@ package org.broadinstitute.sting.oneoffprojects.walkers; +import org.broad.tribble.vcf.VCFHeader; +import org.broad.tribble.vcf.VCFHeaderLine; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.RodVCF; import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; import org.broadinstitute.sting.gatk.walkers.Reference; import org.broadinstitute.sting.gatk.walkers.RodWalker; @@ -13,8 +14,6 @@ import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; -import org.broadinstitute.sting.utils.genotype.vcf.VCFHeader; -import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils; import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/VCFReferenceFixerWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/VCFReferenceFixerWalker.java index 1451ab1b4..1e2edef15 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/VCFReferenceFixerWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/VCFReferenceFixerWalker.java @@ -1,12 +1,11 @@ package org.broadinstitute.sting.oneoffprojects.walkers; +import org.broad.tribble.vcf.VCFHeader; +import org.broad.tribble.vcf.VCFHeaderLine; +import org.broad.tribble.vcf.VCFRecord; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.RodVCF; -import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.collections.Pair; @@ -42,14 +41,13 @@ public class VCFReferenceFixerWalker extends RodWalker { } List rods = tracker.getReferenceMetaData("fixme"); Object rod = rods.get(0); - RodVCF vcfrod = null; - if ( rod instanceof RodVCF ) { - vcfrod = (RodVCF) rod; + VCFRecord vcfrod = null; + if ( rod instanceof VCFRecord ) { + vcfrod = (VCFRecord) rod; } - VCFRecord rec = vcfrod.getRecord(); - rec.setReferenceBase(new String(BaseUtils.charSeq2byteSeq(context.getBases()))); - return rec; + if (vcfrod != null) vcfrod.setReferenceBase(new String(BaseUtils.charSeq2byteSeq(context.getBases()))); + return vcfrod; /* VariantContext vcon = null; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/InsertSizeDistribution.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/InsertSizeDistribution.java index a0bd1fa09..f66195d3d 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/InsertSizeDistribution.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/InsertSizeDistribution.java @@ -1,10 +1,10 @@ package org.broadinstitute.sting.oneoffprojects.walkers.annotator; +import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; -import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ProportionOfNonrefBasesSupportingSNP.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ProportionOfNonrefBasesSupportingSNP.java index 0f17db4a0..1f0d5e171 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ProportionOfNonrefBasesSupportingSNP.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ProportionOfNonrefBasesSupportingSNP.java @@ -25,9 +25,9 @@ package org.broadinstitute.sting.oneoffprojects.walkers.annotator; +import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; -import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ProportionOfRefSecondBasesSupportingSNP.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ProportionOfRefSecondBasesSupportingSNP.java index 518650632..45e9dc0f4 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ProportionOfRefSecondBasesSupportingSNP.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ProportionOfRefSecondBasesSupportingSNP.java @@ -25,11 +25,11 @@ package org.broadinstitute.sting.oneoffprojects.walkers.annotator; +import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ProportionOfSNPSecondBasesSupportingRef.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ProportionOfSNPSecondBasesSupportingRef.java index 6fb0635e7..4c43bc09f 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ProportionOfSNPSecondBasesSupportingRef.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ProportionOfSNPSecondBasesSupportingRef.java @@ -25,12 +25,12 @@ package org.broadinstitute.sting.oneoffprojects.walkers.annotator; +import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; -import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ThousandGenomesAnnotator.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ThousandGenomesAnnotator.java index f1c8dabf1..d42ff1364 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ThousandGenomesAnnotator.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ThousandGenomesAnnotator.java @@ -1,12 +1,12 @@ package org.broadinstitute.sting.oneoffprojects.walkers.annotator; +import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.oneoffprojects.refdata.HapmapVCFROD; -import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; import java.util.HashMap; import java.util.Map; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/LocusConcordanceInfo.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/LocusConcordanceInfo.java index fb28ecf89..7a9d2cd25 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/LocusConcordanceInfo.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/LocusConcordanceInfo.java @@ -1,9 +1,10 @@ package org.broadinstitute.sting.oneoffprojects.walkers.varianteval.multisample; +import org.broad.tribble.vcf.VCFGenotypeRecord; +import org.broad.tribble.vcf.VCFRecord; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord; -import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; +import org.broadinstitute.sting.utils.GenomeLocParser; import java.util.Arrays; import java.util.HashSet; @@ -76,9 +77,9 @@ class LocusConcordanceInfo { public GenomeLoc getLoc() { if ( concordanceType == ConcordanceType.TRUTH_SET || concordanceType == ConcordanceType.BOTH_SETS || concordanceType == ConcordanceType.TRUTH_SET_VARIANT_FILTERED) { - return truthVCFRecord.getLocation(); + return GenomeLocParser.createGenomeLoc(truthVCFRecord.getChr(),truthVCFRecord.getStart()); } else { - return variantVCFRecord.getLocation(); + return GenomeLocParser.createGenomeLoc( variantVCFRecord.getChr(),variantVCFRecord.getStart()); } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/MultiSampleConcordanceWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/MultiSampleConcordanceWalker.java index 2e6276ee7..0aa3199ff 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/MultiSampleConcordanceWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/MultiSampleConcordanceWalker.java @@ -25,10 +25,11 @@ package org.broadinstitute.sting.oneoffprojects.walkers.varianteval.multisample; +import org.broad.tribble.vcf.VCFCodec; +import org.broad.tribble.vcf.VCFRecord; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.RodVCF; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.RMD; import org.broadinstitute.sting.gatk.walkers.Requires; @@ -41,7 +42,7 @@ import org.broadinstitute.sting.commandline.Argument; * a VCF binding with the name 'variants'. * @Author: Chris Hartl */ -@Requires(value= DataSource.REFERENCE,referenceMetaData = {@RMD(name="truth",type= RodVCF.class),@RMD(name="variants",type= RodVCF.class)}) +@Requires(value= DataSource.REFERENCE,referenceMetaData = {@RMD(name="truth",type= VCFCodec.class),@RMD(name="variants",type= VCFRecord.class)}) public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInfo, MultiSampleConcordanceSet > { @Argument(fullName="noLowDepthLoci", shortName="NLD", doc="Do not use loci in analysis where the variant depth (as specified in the VCF) is less than the given number; "+ "DO NOT USE THIS IF YOUR VCF DOES NOT HAVE 'DP' IN THE FORMAT FIELD", required=false) private int minDepth = -1; @@ -65,14 +66,14 @@ public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInf if ( tracker == null ) { return null; } - RodVCF variantData = tracker.lookup("variants",RodVCF.class); + VCFRecord variantData = tracker.lookup("variants", VCFRecord.class); if ( ignoreKnownSites ) { // ignoreKnownSites && tracker.lookup("variants",null) != null && ! ( (RodVCF) tracker.lookup("variants",null)).isNovel() ) ) if ( variantData != null && ! variantData.isNovel() ) { //logger.info("Not novel: "+( (RodVCF) tracker.lookup("variants",null)).getID()); return null; } } - RodVCF truthData = tracker.lookup("truth",RodVCF.class); + VCFRecord truthData = tracker.lookup("truth",VCFRecord.class); LocusConcordanceInfo concordance; if ( truthData == null && variantData == null) { @@ -82,33 +83,33 @@ public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInf } else if ( truthData == null ) { // not in the truth set - if ( ( (RodVCF) variantData ).isFiltered() ) { + if ( variantData.isFiltered() ) { concordance = null; } else { - concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.VARIANT_SET,null, ( (RodVCF) variantData ).getRecord(),ref); + concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.VARIANT_SET,null,variantData,ref); } } else if ( variantData == null ) { // not in the variant set - if ( ( (RodVCF) truthData).isFiltered() ) { + if ( (truthData).isFiltered() ) { concordance = null; } else { - concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.TRUTH_SET,( (RodVCF) truthData).getRecord(),null,ref); + concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.TRUTH_SET,truthData,null,ref); } } else { // in both // check for filtering - boolean truth_filter = ((RodVCF) truthData).isFiltered(); - boolean call_filter = ((RodVCF) variantData).isFiltered(); + boolean truth_filter = truthData.isFiltered(); + boolean call_filter = variantData.isFiltered(); if ( truth_filter && call_filter ) { @@ -116,15 +117,15 @@ public class MultiSampleConcordanceWalker extends RodWalker< LocusConcordanceInf } else if ( truth_filter ) { - concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.VARIANT_SET,null, ( (RodVCF) variantData ).getRecord(),ref); + concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.VARIANT_SET,null,variantData,ref); } else if ( call_filter ) { - concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.TRUTH_SET_VARIANT_FILTERED,( (RodVCF) truthData).getRecord(), null ,ref); + concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.TRUTH_SET_VARIANT_FILTERED,truthData, null ,ref); } else { - concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.BOTH_SETS,( (RodVCF) truthData).getRecord(),( (RodVCF) variantData).getRecord(),ref); + concordance = new LocusConcordanceInfo(LocusConcordanceInfo.ConcordanceType.BOTH_SETS,truthData,variantData,ref); } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/VCFConcordanceCalculator.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/VCFConcordanceCalculator.java index e3f1e6d69..a9ec9f487 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/VCFConcordanceCalculator.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/multisample/VCFConcordanceCalculator.java @@ -1,12 +1,7 @@ package org.broadinstitute.sting.oneoffprojects.walkers.varianteval.multisample; +import org.broad.tribble.vcf.VCFGenotypeRecord; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeEncoding; -import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord; - -import java.util.Arrays; -import java.util.HashSet; -import java.util.Set; /** * Created by IntelliJ IDEA. diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/vcftools/BeagleTrioToVCFWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/vcftools/BeagleTrioToVCFWalker.java index 7fb0a4c0d..5725c6533 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/vcftools/BeagleTrioToVCFWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/vcftools/BeagleTrioToVCFWalker.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.oneoffprojects.walkers.vcftools; +import org.broad.tribble.vcf.VCFRecord; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; @@ -77,7 +78,7 @@ public class BeagleTrioToVCFWalker extends RodWalker { if ( vc != null ) { if ( ! headerWritten ) { - RodVCF vcfrod = tracker.lookup(TRACK_NAME,RodVCF.class); + VCFRecord vcfrod = tracker.lookup(TRACK_NAME,VCFRecord.class); writer.writeHeader(vcfrod.getHeader()); headerWritten = true; } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/vcftools/SimpleVCFIntersectWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/vcftools/SimpleVCFIntersectWalker.java index 9a777e413..a389b04b5 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/vcftools/SimpleVCFIntersectWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/vcftools/SimpleVCFIntersectWalker.java @@ -25,10 +25,13 @@ package org.broadinstitute.sting.oneoffprojects.walkers.vcftools; +import org.broad.tribble.vcf.VCFGenotypeRecord; +import org.broad.tribble.vcf.VCFHeader; +import org.broad.tribble.vcf.VCFHeaderLine; +import org.broad.tribble.vcf.VCFRecord; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.RodVCF; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.utils.genotype.vcf.*; @@ -70,8 +73,8 @@ public class SimpleVCFIntersectWalker extends RodWalker{ return null; } - RodVCF priorityCall = tracker.lookup("priority",RodVCF.class); - RodVCF otherCall = tracker.lookup("other",RodVCF.class); + VCFRecord priorityCall = tracker.lookup("priority",VCFRecord.class); + VCFRecord otherCall = tracker.lookup("other",VCFRecord.class); if ( priorityCall == null && otherCall == null ) { return null; @@ -82,9 +85,9 @@ public class SimpleVCFIntersectWalker extends RodWalker{ if ( ! union ) { return null; } else if ( merger.hasBeenInstantiated() ) { - merger.update(null,otherCall.getRecord()); + merger.update(null,otherCall); } else { - merger.hold(otherCall.getRecord()); + merger.hold(otherCall); } } else if ( otherCall == null ) { @@ -92,17 +95,17 @@ public class SimpleVCFIntersectWalker extends RodWalker{ if ( ! union ) { return null; } else if ( merger.hasBeenInstantiated() ) { - merger.update(priorityCall.getRecord(),null); + merger.update(priorityCall,null); } else { - merger.hold(priorityCall.getRecord()); + merger.hold(priorityCall); } } else { if ( merger.hasBeenInstantiated() ) { - merger.update(priorityCall.getRecord(), otherCall.getRecord()); + merger.update(priorityCall, otherCall); } else { - merger = instantiateMerger(priorityCall.getRecord(),otherCall.getRecord()); + merger = instantiateMerger(priorityCall,otherCall); } } @@ -255,8 +258,8 @@ class VCFRecordMerger { private HashMap getFields(VCFRecord record1, VCFRecord record2) { HashMap fields = new HashMap(); - fields.put(VCFHeader.HEADER_FIELDS.CHROM, record1.getLocation().getContig()); - fields.put(VCFHeader.HEADER_FIELDS.POS, String.format("%d",record1.getLocation().getStart())); + fields.put(VCFHeader.HEADER_FIELDS.CHROM, record1.getChr()); + fields.put(VCFHeader.HEADER_FIELDS.POS, String.format("%d",record1.getStart())); fields.put(VCFHeader.HEADER_FIELDS.ID, record1.getID()); fields.put(VCFHeader.HEADER_FIELDS.ALT, listAsString(record1.getAlternateAlleleList())); fields.put(VCFHeader.HEADER_FIELDS.REF, record1.getReference()); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/TrioGenotyperWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/TrioGenotyperWalker.java index 15dd0a02d..b41f7412b 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/TrioGenotyperWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/TrioGenotyperWalker.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.playground.gatk.walkers; +import org.broad.tribble.vcf.VCFGenotypeRecord; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; @@ -42,7 +43,6 @@ import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord; import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; import org.broadinstitute.sting.gatk.walkers.varianteval.MendelianViolationEvaluator; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/GenomicAnnotation.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/GenomicAnnotation.java index b54ca4eda..555f2ce46 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/GenomicAnnotation.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/GenomicAnnotation.java @@ -8,6 +8,7 @@ import java.util.Map; import java.util.Set; import java.util.Map.Entry; +import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; @@ -21,7 +22,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; /** * This plugin for {@link VariantAnnotatorEngine} serves as the core @@ -73,8 +73,8 @@ public class GenomicAnnotation implements InfoFieldAnnotation { final Map annotations = new HashMap(); for(final GATKFeature gatkFeature : tracker.getAllRods()) { - final ReferenceOrderedDatum rod = (ReferenceOrderedDatum) gatkFeature.getUnderlyingObject(); - final String name = rod.getName(); + final Object rod = gatkFeature.getUnderlyingObject(); + final String name = gatkFeature.getName(); if( name.equals("variant") || name.equals("interval") ) { continue; } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/GenomicAnnotator.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/GenomicAnnotator.java index 45a0d9fc6..31f60d2ae 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/GenomicAnnotator.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/GenomicAnnotator.java @@ -35,13 +35,15 @@ import java.util.Map; import java.util.Set; import java.util.TreeSet; +import org.broad.tribble.vcf.VCFHeader; +import org.broad.tribble.vcf.VCFHeaderLine; +import org.broad.tribble.vcf.VCFRecord; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.RodVCF; import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; import org.broadinstitute.sting.gatk.walkers.By; import org.broadinstitute.sting.gatk.walkers.DataSource; @@ -50,8 +52,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.genotype.vcf.VCFHeader; -import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils; import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; @@ -183,7 +183,7 @@ public class GenomicAnnotator extends RodWalker { } } - if ( variant instanceof RodVCF ) { //TODO is this if-statement necessary? + if ( variant instanceof VCFRecord) { //TODO is this if-statement necessary? for(VariantContext annotatedVC : annotatedVCs ) { vcfWriter.addRecord(VariantContextAdaptors.toVCF(annotatedVC, ref.getBase())); } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/diagnostics/SNPDensity.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/diagnostics/SNPDensity.java index 4572db0e5..43585a82d 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/diagnostics/SNPDensity.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/diagnostics/SNPDensity.java @@ -25,11 +25,12 @@ package org.broadinstitute.sting.playground.gatk.walkers.diagnostics; +import org.broad.tribble.vcf.VCFCodec; +import org.broad.tribble.vcf.VCFRecord; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.RodVCF; import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.GenomeLoc; @@ -40,7 +41,7 @@ import org.broadinstitute.sting.commandline.Argument; * Computes the density of SNPs passing and failing filters in intervals on the genome and emits a table for display */ @By(DataSource.REFERENCE) -@Requires(value={},referenceMetaData=@RMD(name="eval",type=RodVCF.class)) +@Requires(value={},referenceMetaData=@RMD(name="eval",type= VCFCodec.class)) public class SNPDensity extends RefWalker, SNPDensity.Counter> { @Argument(fullName="granularity", shortName="granularity", doc="", required=false) private int granularity = 1000000; @@ -64,7 +65,7 @@ public class SNPDensity extends RefWalker, SNPDe public Pair map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { VariantContext vc = null; - RodVCF vcf = tracker.lookup("eval",RodVCF.class); + VCFRecord vcf = tracker.lookup("eval",VCFRecord.class); if (vcf != null) vc = VariantContextAdaptors.toVariantContext("eval", vcf); return new Pair(vc, context.getLocation()); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java index 43e6058bc..1102e1290 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java @@ -1,10 +1,10 @@ package org.broadinstitute.sting.playground.gatk.walkers.variantoptimizer; +import org.broad.tribble.vcf.VCFRecord; import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; import java.util.*; import java.io.IOException; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/ApplyVariantClustersWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/ApplyVariantClustersWalker.java index 37e05ff9a..399eaecb5 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/ApplyVariantClustersWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/ApplyVariantClustersWalker.java @@ -25,19 +25,21 @@ package org.broadinstitute.sting.playground.gatk.walkers.variantoptimizer; +import org.broad.tribble.vcf.*; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.RodVCF; import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.utils.genotype.vcf.*; +import org.broadinstitute.sting.utils.genotype.vcf.VCFReader; +import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils; +import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; import java.io.File; import java.io.IOException; @@ -128,7 +130,7 @@ public class ApplyVariantClustersWalker extends RodWalker dataSources = this.getToolkit().getRodDataSources(); for ( ReferenceOrderedDataSource source : dataSources ) { RMDTrack rod = source.getReferenceOrderedData(); - if ( rod.getType().equals(RodVCF.class) ) { + if ( rod.getType().equals(VCFCodec.class) ) { VCFReader reader = new VCFReader(rod.getFile()); Set vcfSamples = reader.getHeader().getGenotypeSamples(); samples.addAll(vcfSamples); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantConcordanceROCCurveWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantConcordanceROCCurveWalker.java index 73e5edefa..27ae29674 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantConcordanceROCCurveWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/VariantConcordanceROCCurveWalker.java @@ -25,12 +25,12 @@ package org.broadinstitute.sting.playground.gatk.walkers.variantoptimizer; +import org.broad.tribble.vcf.VCFRecord; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.RodVCF; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.collections.Pair; @@ -83,11 +83,11 @@ public class VariantConcordanceROCCurveWalker extends RodWalker 1 ) { multiSampleCalls = true; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VCFCombine.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VCFCombine.java index 1ab6d79ad..21f9cbe5a 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VCFCombine.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VCFCombine.java @@ -25,10 +25,12 @@ package org.broadinstitute.sting.playground.gatk.walkers.vcftools; +import org.broad.tribble.vcf.VCFHeader; +import org.broad.tribble.vcf.VCFHeaderLine; +import org.broad.tribble.vcf.VCFRecord; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.RodVCF; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.walkers.Requires; @@ -136,12 +138,13 @@ public class VCFCombine extends RodWalker { return null; // get all of the vcf rods at this locus - ArrayList vcfRods = new ArrayList(); + Map vcfRods = new LinkedHashMap(); Iterator rods = tracker.getAllRods().iterator(); while (rods.hasNext()) { - Object rod = rods.next().getUnderlyingObject(); - if ( rod instanceof RodVCF ) - vcfRods.add((RodVCF)rod); + GATKFeature feat = rods.next(); + Object rod = feat.getUnderlyingObject(); + if ( rod instanceof VCFRecord ) + vcfRods.put((VCFRecord)rod,feat.getName()); } if ( vcfRods.size() == 0 ) @@ -166,18 +169,20 @@ public class VCFCombine extends RodWalker { return vcfWriter; } - private VCFRecord vcfUnion(ArrayList rods) { + private VCFRecord vcfUnion(Map rods) { if ( priority == null ) - return rods.get(0).mCurrentRecord; + return rods.keySet().iterator().next(); if ( annotateUnion ) { - Map rodMap = new HashMap(); - for ( RodVCF vcf : rods ) { rodMap.put(vcf.getName(), vcf); } + Map rodMap = new HashMap(); + for ( VCFRecord vcf : rods.keySet() ) { + rodMap.put(rods.get(vcf),vcf); + } String priority1 = priority[0]; String priority2 = priority[1]; - VCFRecord vcf1 = rodMap.containsKey(priority1) ? rodMap.get(priority1).getRecord() : null; - VCFRecord vcf2 = rodMap.containsKey(priority2) ? rodMap.get(priority2).getRecord() : null; + VCFRecord vcf1 = rodMap.containsKey(priority1) ? rodMap.get(priority1) : null; + VCFRecord vcf2 = rodMap.containsKey(priority2) ? rodMap.get(priority2) : null; // for simplicity, we are setting set and call for vcf1 String set = priority1; @@ -213,9 +218,9 @@ public class VCFCombine extends RodWalker { return call; } else { for ( String rodname : priority ) { - for ( RodVCF rod : rods ) { - if ( rod.getName().equals(rodname) ) - return rod.mCurrentRecord; + for ( VCFRecord rod : rods.keySet() ) { + if ( rods.get(rod).equals(rodname) ) + return rod; } } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VCFSelectWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VCFSelectWalker.java index 7ea122dd4..ef24b51fa 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VCFSelectWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VCFSelectWalker.java @@ -29,10 +29,10 @@ import org.apache.commons.jexl.Expression; import org.apache.commons.jexl.ExpressionFactory; import org.apache.commons.jexl.JexlContext; import org.apache.commons.jexl.JexlHelper; +import org.broad.tribble.vcf.*; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.RodVCF; import org.broadinstitute.sting.gatk.walkers.RMD; import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.RodWalker; @@ -45,7 +45,7 @@ import java.util.*; /** * Selects variant calls for output from a user-supplied VCF file using a number of user-selectable, parameterizable criteria. */ -@Requires(value={},referenceMetaData=@RMD(name="variant",type= RodVCF.class)) +@Requires(value={},referenceMetaData=@RMD(name="variant",type= VCFCodec.class)) public class VCFSelectWalker extends RodWalker { @Argument(fullName="match", shortName="match", doc="Expression used with INFO fields to select VCF records for inclusion in the output VCF(see wiki docs for more info)", required=false) protected String[] MATCH_STRINGS = new String[]{null}; @@ -66,7 +66,7 @@ public class VCFSelectWalker extends RodWalker { private List matchExpressions = new ArrayList(); - private void initializeVcfWriter(RodVCF rod) { + private void initializeVcfWriter(VCFRecord record) { // setup the header fields Set hInfo = new HashSet(); hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); @@ -78,7 +78,7 @@ public class VCFSelectWalker extends RodWalker { } writer = new VCFWriter(out); - writer.writeHeader(new VCFHeader(hInfo, rod.getHeader().getGenotypeSamples())); + writer.writeHeader(new VCFHeader(hInfo, record.getHeader().getGenotypeSamples())); } public void initialize() { @@ -108,15 +108,15 @@ public class VCFSelectWalker extends RodWalker { if ( tracker == null ) return 0; - RodVCF variant = tracker.lookup("variant",RodVCF.class); + VCFRecord variant = tracker.lookup("variant",VCFRecord.class); // ignore places where we don't have a variant if ( variant == null ) return 0; boolean someoneMatched = false; for ( MatchExp exp : matchExpressions ) { - Map infoMap = new HashMap(variant.mCurrentRecord.getInfoValues()); - infoMap.put("QUAL", String.valueOf(variant.mCurrentRecord.getQual())); + Map infoMap = new HashMap(variant.getInfoValues()); + infoMap.put("QUAL", String.valueOf(variant.getQual())); JexlContext jContext = JexlHelper.createContext(); jContext.setVars(infoMap); @@ -139,13 +139,11 @@ public class VCFSelectWalker extends RodWalker { return 1; } - private void writeVCF(RodVCF variant) { - VCFRecord rec = variant.mCurrentRecord; - + private void writeVCF(VCFRecord variant) { if ( writer == null ) initializeVcfWriter(variant); - writer.addRecord(rec); + writer.addRecord(variant); } public Integer reduce(Integer value, Integer sum) { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VCFSubsetWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VCFSubsetWalker.java index 0dff1a011..24a735422 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VCFSubsetWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VCFSubsetWalker.java @@ -25,14 +25,15 @@ package org.broadinstitute.sting.playground.gatk.walkers.vcftools; +import org.broad.tribble.vcf.*; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.RodVCF; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.utils.genotype.vcf.*; +import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; import java.io.File; import java.util.ArrayList; @@ -80,9 +81,8 @@ public class VCFSubsetWalker extends RodWalker, VCFWriter> if (tracker != null) { for (GATKFeature feature : tracker.getAllRods()) { Object rod = feature.getUnderlyingObject(); - if (rod instanceof RodVCF) { - RodVCF vcfrod = (RodVCF) rod; - VCFRecord record = vcfrod.mCurrentRecord; + if (rod instanceof VCFRecord) { + VCFRecord vcfrod = (VCFRecord) rod; if (SAMPLES == null) { SAMPLES = new HashSet(); @@ -95,7 +95,7 @@ public class VCFSubsetWalker extends RodWalker, VCFWriter> //out.println(record.toStringEncoding(vcfrod.getHeader())); - records.add(record); + records.add(vcfrod); } } } @@ -129,8 +129,8 @@ public class VCFSubsetWalker extends RodWalker, VCFWriter> } VCFRecord subset = new VCFRecord(record.getReference(), - record.getLocation().getContig(), - (int) record.getLocation().getStart(), + record.getChr(), + record.getStart(), record.getID(), genotypeEncodings, record.getQual(), diff --git a/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFApplyCuts.java b/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFApplyCuts.java index ce80e2e2f..8c1bc4c1e 100644 --- a/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFApplyCuts.java +++ b/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFApplyCuts.java @@ -27,6 +27,8 @@ package org.broadinstitute.sting.playground.tools.vcf; //import org.broadinstitute.sting.playground.tools.vcf.VCFOptimize.Cut; +import org.broad.tribble.vcf.VCFHeader; +import org.broad.tribble.vcf.VCFRecord; import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.commandline.Argument; @@ -152,7 +154,7 @@ class VCFApplyCuts extends CommandLineProgram VCFReader reader = null; - if (autocorrect) { reader = new VCFReader(VCFHomogenizer.create(filename)); } + if (autocorrect) { reader = new VCFReader(new File(filename),new VCFHomogenizer()); } else { reader = new VCFReader(new File(filename)); } VCFHeader header = reader.getHeader(); diff --git a/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFCallRates.java b/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFCallRates.java index 72909bed2..6e5d441c1 100644 --- a/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFCallRates.java +++ b/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFCallRates.java @@ -24,15 +24,18 @@ */ package org.broadinstitute.sting.playground.tools.vcf; +import org.broad.tribble.vcf.VCFGenotypeEncoding; +import org.broad.tribble.vcf.VCFGenotypeRecord; +import org.broad.tribble.vcf.VCFHeader; +import org.broad.tribble.vcf.VCFRecord; import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.utils.genotype.vcf.*; - import java.io.*; import java.util.*; import net.sf.picard.util.Interval; +import org.broadinstitute.sting.utils.genotype.vcf.VCFReader; class VCFCallRates extends CommandLineProgram @@ -62,7 +65,7 @@ class VCFCallRates extends CommandLineProgram if (autocorrect) { - reader = new VCFReader(VCFHomogenizer.create(filename)); + reader = new VCFReader(new File(filename),new VCFHomogenizer()); } else { diff --git a/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFHomogenizer.java b/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFHomogenizer.java index 6896e4231..9e8de6d28 100644 --- a/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFHomogenizer.java +++ b/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFHomogenizer.java @@ -25,6 +25,8 @@ package org.broadinstitute.sting.playground.tools.vcf; +import org.broad.tribble.vcf.VCFCodec; + import java.io.*; import java.util.zip.*; @@ -34,93 +36,10 @@ import java.util.zip.*; * @author jmaguire */ -class VCFHomogenizer extends InputStream -{ - private BufferedReader in = null; - private String currentLine; - private ByteArrayInputStream stream; - - public VCFHomogenizer(Reader in) - { - this.in = (BufferedReader)in; - currentLine = this.readLine(); - stream = new ByteArrayInputStream(currentLine.getBytes()); - } - - //////////////////////////////////////// - // InputStream methods - - public int available() - { - return 1; - } - - public void close() throws java.io.IOException - { - this.in.close(); - } - - public void mark(int readlimit) - { - System.err.println("not implemented"); - System.exit(-1); - } - - public void reset() - { - System.err.println("not implemented"); - System.exit(-1); - } - - public boolean markSupported() - { - return false; - } - - public int read() - { - if ((stream == null) || (stream.available() == 0)) - { - currentLine = this.readLine(); - if (currentLine == null) { return -1; } - stream = new ByteArrayInputStream(currentLine.getBytes()); - } - return stream.read(); - } - - // END InputStream methods - ///////////////////////////////////// - - - public static VCFHomogenizer create(String filename) - { - try - { - if (filename.endsWith(".gz")) - { - return new VCFHomogenizer(new BufferedReader( - new InputStreamReader( - new GZIPInputStream( - new FileInputStream(filename))))); - } - else - { - return new VCFHomogenizer(new BufferedReader( - new InputStreamReader( - new FileInputStream(filename)))); - } - } - catch (Exception e) - { - e.printStackTrace(); - System.exit(-1); - } - - return null; - } +class VCFHomogenizer implements VCFCodec.LineTransform { //my ($chr, $off, $id, $ref, $alt, $qual, $filter, $info, $format, @genotypes) = @tokens; - private String editLine(String input) + public String lineTransform(String input) { if (input == null) { return null; } @@ -228,47 +147,6 @@ class VCFHomogenizer extends InputStream return output; } - - public String readLine() - { - try - { - String line = in.readLine(); - if (line == null) { return null; } - else { return editLine(line + "\n"); } - } - catch (Exception e) - { - e.printStackTrace(); - System.exit(-1); - } - - return null; - } - - /* - public int read() - { - throw new RuntimeException("VCFHomogenizer.read() not implemented."); - } - - public byte read(byte[] b) - { - throw new RuntimeException("VCFHomogenizer.read(byte[]) not implemented."); - } - - public int read(byte[] b, int off, int len) - { - throw new RuntimeException("VCFHomogenizer.read(byte[], int, int) not implemented."); - } - - public long skip(long n) - { - throw new RuntimeException("VCFHomogenizer.skip(long) not implemented."); - } - - public boolean markSupported() { return false; } - */ } diff --git a/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFMerge.java b/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFMerge.java index 27adad79e..a9a258818 100644 --- a/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFMerge.java +++ b/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFMerge.java @@ -24,6 +24,8 @@ */ package org.broadinstitute.sting.playground.tools.vcf; +import org.broad.tribble.vcf.VCFHeader; +import org.broad.tribble.vcf.VCFRecord; import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.commandline.Argument; @@ -63,8 +65,8 @@ class VCFMerge extends CommandLineProgram if (autocorrect) { - reader1 = new VCFReader(VCFHomogenizer.create(filename1)); - reader2 = new VCFReader(VCFHomogenizer.create(filename2)); + reader1 = new VCFReader(new File(filename1),new VCFHomogenizer()); + reader2 = new VCFReader(new File(filename2),new VCFHomogenizer()); } else { diff --git a/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFOptimize.java b/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFOptimize.java index 588598177..eeefbb842 100644 --- a/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFOptimize.java +++ b/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFOptimize.java @@ -24,10 +24,11 @@ */ package org.broadinstitute.sting.playground.tools.vcf; +import org.broad.tribble.vcf.VCFHeader; +import org.broad.tribble.vcf.VCFRecord; import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.commandline.Argument; - -import org.broadinstitute.sting.utils.genotype.vcf.*; +import org.broadinstitute.sting.utils.genotype.vcf.VCFReader; import java.io.*; @@ -306,7 +307,7 @@ class VCFOptimize extends CommandLineProgram VCFReader reader = null; - if (autocorrect) { reader = new VCFReader(VCFHomogenizer.create(filename)); } + if (autocorrect) { reader = new VCFReader(new File(filename),new VCFHomogenizer()); } else { reader = new VCFReader(new File(filename)); } PrintWriter output = null; diff --git a/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFSequenomAnalysis.java b/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFSequenomAnalysis.java index eb1d23017..3e04ac2bb 100644 --- a/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFSequenomAnalysis.java +++ b/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFSequenomAnalysis.java @@ -24,14 +24,15 @@ */ package org.broadinstitute.sting.playground.tools.vcf; +import org.broad.tribble.vcf.VCFHeader; +import org.broad.tribble.vcf.VCFRecord; import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.utils.genotype.vcf.*; - import java.io.*; import net.sf.picard.util.Interval; +import org.broadinstitute.sting.utils.genotype.vcf.VCFReader; class VCFSequenomAnalysis extends CommandLineProgram @@ -66,8 +67,8 @@ class VCFSequenomAnalysis extends CommandLineProgram if (autocorrect) { - reader1 = new VCFReader(VCFHomogenizer.create(filename1)); - reader2 = new VCFReader(VCFHomogenizer.create(filename2)); + reader1 = new VCFReader(new File(filename1),new VCFHomogenizer()); + reader2 = new VCFReader(new File(filename2),new VCFHomogenizer()); } else { diff --git a/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFSequenomAnalysis2.java b/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFSequenomAnalysis2.java index 118fc8cff..262e9a42a 100644 --- a/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFSequenomAnalysis2.java +++ b/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFSequenomAnalysis2.java @@ -24,16 +24,19 @@ */ package org.broadinstitute.sting.playground.tools.vcf; +import org.broad.tribble.vcf.VCFGenotypeEncoding; +import org.broad.tribble.vcf.VCFGenotypeRecord; +import org.broad.tribble.vcf.VCFHeader; +import org.broad.tribble.vcf.VCFRecord; import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.utils.genotype.vcf.*; - import java.io.*; import java.util.*; import java.lang.*; import net.sf.picard.util.Interval; +import org.broadinstitute.sting.utils.genotype.vcf.VCFReader; class VCFSequenomAnalysis2 extends CommandLineProgram @@ -68,8 +71,8 @@ class VCFSequenomAnalysis2 extends CommandLineProgram if (autocorrect) { - reader1 = new VCFReader(VCFHomogenizer.create(filename1)); - reader2 = new VCFReader(VCFHomogenizer.create(filename2)); + reader1 = new VCFReader(new File(filename1),new VCFHomogenizer()); + reader2 = new VCFReader(new File(filename2),new VCFHomogenizer()); } else { diff --git a/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFTool.java b/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFTool.java index 9a53f17d0..c6a436eac 100644 --- a/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFTool.java +++ b/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFTool.java @@ -24,6 +24,10 @@ */ package org.broadinstitute.sting.playground.tools.vcf; +import org.broad.tribble.vcf.VCFGenotypeEncoding; +import org.broad.tribble.vcf.VCFGenotypeRecord; +import org.broad.tribble.vcf.VCFHeader; +import org.broad.tribble.vcf.VCFRecord; import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.commandline.Argument; @@ -65,7 +69,7 @@ class VCFValidate extends CommandLineProgram VCFReader reader = null; - if (autocorrect) { reader = new VCFReader(VCFHomogenizer.create(filename)); } + if (autocorrect) { reader = new VCFReader(new File(filename),new VCFHomogenizer()); } else { reader = new VCFReader(new File(filename)); } VCFHeader header = reader.getHeader(); @@ -123,8 +127,10 @@ class VCFStats extends CommandLineProgram String stop = tokens[2]; Interval locus = new Interval(chr, Integer.parseInt(start), Integer.parseInt(stop)); - if (autocorrect) { reader = new VCFReader(VCFHomogenizer.create(in_filename)); } - else { reader = new VCFReader(new File(in_filename)); } + if (autocorrect) + reader = new VCFReader(new File(in_filename),new VCFHomogenizer()); + else + reader = new VCFReader(new File(in_filename)); VCFHeader header = reader.getHeader(); @@ -258,7 +264,7 @@ class CheckRefFields extends CommandLineProgram VCFReader reader = null; - if (autocorrect) { reader = new VCFReader(VCFHomogenizer.create(filename)); } + if (autocorrect) { reader = new VCFReader(new File(filename),new VCFHomogenizer()); } else { reader = new VCFReader(new File(filename)); } ReferenceSequenceFileWalker ref = new ReferenceSequenceFileWalker(new File(fasta_filename)); @@ -274,7 +280,7 @@ class CheckRefFields extends CommandLineProgram { VCFRecord record = reader.next(); - String chr = record.getLocation().getContig(); + String chr = record.getChr(); if (! chr.equals(ref_seq_name)) { System.out.println("Loading " + chr); @@ -282,7 +288,7 @@ class CheckRefFields extends CommandLineProgram ref_seq_name = chr; } - long offset = record.getLocation().getStart(); + long offset = record.getStart(); char vcf_ref_base = record.getReference().charAt(0); char fasta_ref_base = (char)ref_seq[(int)offset-1]; @@ -319,7 +325,7 @@ class FixRefFields extends CommandLineProgram VCFReader reader = null; - if (autocorrect) { reader = new VCFReader(VCFHomogenizer.create(filename)); } + if (autocorrect) { reader = new VCFReader(new File(filename),new VCFHomogenizer()); } else { reader = new VCFReader(new File(filename)); } ReferenceSequenceFileWalker ref = new ReferenceSequenceFileWalker(new File(fasta_filename)); @@ -349,7 +355,7 @@ class FixRefFields extends CommandLineProgram { VCFRecord record = reader.next(); - String chr = record.getLocation().getContig(); + String chr = record.getChr(); if (! chr.equals(ref_seq_name)) { System.out.println("Loading " + chr); @@ -357,7 +363,7 @@ class FixRefFields extends CommandLineProgram ref_seq_name = chr; } - long offset = record.getLocation().getStart(); + long offset = record.getStart(); char vcf_ref_base = record.getReference().charAt(0); char fasta_ref_base = (char)ref_seq[(int)offset-1]; @@ -498,7 +504,7 @@ class VCFGrep_old extends CommandLineProgram throw new RuntimeException(e); } - if (autocorrect) { reader = new VCFReader(VCFHomogenizer.create(in_filename)); } + if (autocorrect) { reader = new VCFReader(new File(in_filename),new VCFHomogenizer()); } else { reader = new VCFReader(new File(in_filename)); } @@ -528,7 +534,7 @@ class PrintGQ extends CommandLineProgram VCFReader reader; VCFReader reader2; - reader = new VCFReader(VCFHomogenizer.create(filename)); + reader = new VCFReader(new File(filename),new VCFHomogenizer()); VCFHeader header = reader.getHeader(); VCFRecord record = reader.next(); @@ -604,7 +610,7 @@ class VCFSimpleStats extends CommandLineProgram if (autocorrect) { - reader1 = new VCFReader(VCFHomogenizer.create(filename1)); + reader1 = new VCFReader(new File(filename1),new VCFHomogenizer()); } else { @@ -819,8 +825,8 @@ class VCFConcordance extends CommandLineProgram if (autocorrect) { - reader1 = new VCFReader(VCFHomogenizer.create(filename1)); - reader2 = new VCFReader(VCFHomogenizer.create(filename2)); + reader1 = new VCFReader(new File(filename1),new VCFHomogenizer()); + reader2 = new VCFReader(new File(filename2),new VCFHomogenizer()); } else { @@ -1266,8 +1272,8 @@ public class VCFTool public static Interval getIntervalFromRecord(VCFRecord record) { - String chr = record.getLocation().getContig(); - long off = record.getLocation().getStart(); + String chr = record.getChr(); + long off = record.getStart(); return new Interval(chr, (int)off, (int)off); } diff --git a/java/src/org/broadinstitute/sting/utils/SampleUtils.java b/java/src/org/broadinstitute/sting/utils/SampleUtils.java index 1b330476d..1a1cb42bc 100755 --- a/java/src/org/broadinstitute/sting/utils/SampleUtils.java +++ b/java/src/org/broadinstitute/sting/utils/SampleUtils.java @@ -27,12 +27,13 @@ package org.broadinstitute.sting.utils; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMReadGroupRecord; +import org.broad.tribble.vcf.VCFCodec; +import org.broad.tribble.vcf.VCFRecord; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; -import org.broadinstitute.sting.gatk.refdata.RodVCF; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; -import org.broadinstitute.sting.utils.genotype.vcf.VCFReader; import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.genotype.vcf.VCFReader; import java.util.*; @@ -79,7 +80,7 @@ public class SampleUtils { List dataSources = toolkit.getRodDataSources(); for ( ReferenceOrderedDataSource source : dataSources ) { RMDTrack rod = source.getReferenceOrderedData(); - if ( rod.getType().equals(RodVCF.class) ) { + if ( rod.getType().equals(VCFRecord.class) ) { VCFReader reader = new VCFReader(rod.getFile()); samples.addAll(reader.getHeader().getGenotypeSamples()); reader.close(); @@ -108,7 +109,7 @@ public class SampleUtils { List dataSources = toolkit.getRodDataSources(); for ( ReferenceOrderedDataSource source : dataSources ) { RMDTrack rod = source.getReferenceOrderedData(); - if ( rod.getType().equals(RodVCF.class) ) { + if ( rod.getType().equals(VCFCodec.class) ) { VCFReader reader = new VCFReader(rod.getFile()); Set vcfSamples = reader.getHeader().getGenotypeSamples(); for ( String sample : vcfSamples ) diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java index 3f83dd42c..ef2f91e08 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.utils.genotype; import net.sf.samtools.SAMFileHeader; +import org.broad.tribble.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.genotype.geli.*; import org.broadinstitute.sting.utils.genotype.glf.*; diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java index 3923860ea..ad16f6275 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.utils.genotype.glf; import net.sf.samtools.SAMSequenceRecord; import net.sf.samtools.util.BinaryCodec; import net.sf.samtools.util.BlockCompressedOutputStream; +import org.broad.tribble.vcf.VCFGenotypeRecord; import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableGenotype; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -10,7 +11,6 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.genotype.CalledGenotype; import org.broadinstitute.sting.utils.genotype.IndelLikelihood; import org.broadinstitute.sting.utils.genotype.LikelihoodObject; -import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFFilterHeaderLine.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFFilterHeaderLine.java deleted file mode 100755 index c50d8f824..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFFilterHeaderLine.java +++ /dev/null @@ -1,60 +0,0 @@ -package org.broadinstitute.sting.utils.genotype.vcf; - -import org.broadinstitute.sting.utils.Utils; - - -/** - * @author ebanks - *

- * Class VCFFilterHeaderLine - *

- * A class representing a key=value entry for FILTER fields in the VCF header - */ -public class VCFFilterHeaderLine extends VCFHeaderLine { - - private String mName; - private String mDescription; - - - /** - * create a VCF filter header line - * - * @param name the name for this header line - * @param description the description for this header line - */ - public VCFFilterHeaderLine(String name, String description) { - super("FILTER", ""); - mName = name; - mDescription = description; - } - - /** - * create a VCF info header line - * - * @param line the header line - */ - protected VCFFilterHeaderLine(String line) { - super("FILTER", ""); - String[] pieces = line.split(","); - if ( pieces.length < 2 ) - throw new IllegalArgumentException("There are too few values in the VCF FILTER header line: " + line); - - mName = pieces[0]; - mDescription = Utils.trim(pieces[1], '"'); - // just in case there were some commas in the description - for (int i = 1; i < pieces.length; i++) - mDescription += "," + Utils.trim(pieces[i], '"'); - } - - protected String makeStringRep() { - return String.format("FILTER=%s,\"%s\"", mName, mDescription); - } - - public boolean equals(Object o) { - if ( !(o instanceof VCFFilterHeaderLine) ) - return false; - VCFFilterHeaderLine other = (VCFFilterHeaderLine)o; - return mName.equals(other.mName) && - mDescription.equals(other.mDescription); - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFFormatHeaderLine.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFFormatHeaderLine.java deleted file mode 100755 index 577fff133..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFFormatHeaderLine.java +++ /dev/null @@ -1,80 +0,0 @@ -package org.broadinstitute.sting.utils.genotype.vcf; - -import org.broadinstitute.sting.utils.Utils; - - -/** - * @author ebanks - *

- * Class VCFFormatHeaderLine - *

- * A class representing a key=value entry for genotype FORMAT fields in the VCF header - */ -public class VCFFormatHeaderLine extends VCFHeaderLine { - - // the format field types - public enum FORMAT_TYPE { - Integer, Float, String - } - - private String mName; - private int mCount; - private String mDescription; - private FORMAT_TYPE mType; - - - /** - * create a VCF format header line - * - * @param name the name for this header line - * @param count the count for this header line - * @param type the type for this header line - * @param description the description for this header line - */ - public VCFFormatHeaderLine(String name, int count, FORMAT_TYPE type, String description) { - super("FORMAT", ""); - mName = name; - mCount = count; - mType = type; - mDescription = description; - } - - /** - * create a VCF format header line - * - * @param line the header line - */ - protected VCFFormatHeaderLine(String line) { - super("FORMAT", ""); - String[] pieces = line.split(","); - if ( pieces.length < 4 ) - throw new IllegalArgumentException("There are too few values in the VCF FORMAT header line: " + line); - - mName = pieces[0]; - mCount = Integer.valueOf(pieces[1]); - mType = FORMAT_TYPE.valueOf(pieces[2]); - mDescription = Utils.trim(pieces[3], '"'); - // just in case there were some commas in the description - for (int i = 4; i < pieces.length; i++) - mDescription += "," + Utils.trim(pieces[i], '"'); - } - - protected String makeStringRep() { - return String.format("FORMAT=%s,%d,%s,\"%s\"", mName, mCount, mType.toString(), mDescription); - } - - public String getName() { return mName; } - public int getCount() { return mCount; } - public String getDescription() { return mDescription; } - public FORMAT_TYPE getType() { return mType; } - - public boolean equals(Object o) { - if ( !(o instanceof VCFFormatHeaderLine) ) - return false; - VCFFormatHeaderLine other = (VCFFormatHeaderLine)o; - return mName.equals(other.mName) && - mCount == other.mCount && - mDescription.equals(other.mDescription) && - mType == other.mType; - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeEncoding.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeEncoding.java deleted file mode 100644 index abcf969c6..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeEncoding.java +++ /dev/null @@ -1,127 +0,0 @@ -package org.broadinstitute.sting.utils.genotype.vcf; - - -/** - * @author aaron - *

- * Class VCFGenotypeEncoding - *

- * basic encoding class for genotype fields in VCF - */ -public class VCFGenotypeEncoding { - public enum TYPE { - SINGLE_BASE, - INSERTION, - DELETION, - UNCALLED, - MIXED // this type is only valid in aggregate, not for a single VCFGenotypeEncoding - } - - // our length (0 for SINGLE_BASE), our bases, and our type - private final int mLength; - private final String mBases; - private final TYPE mType; - - // public constructor, that parses out the base string - public VCFGenotypeEncoding(String baseString) { - if ((baseString.length() == 1)) { - // are we an empty (no-call) genotype? - if (baseString.equals(VCFGenotypeRecord.EMPTY_ALLELE)) { - mBases = VCFGenotypeRecord.EMPTY_ALLELE; - mLength = 0; - mType = TYPE.UNCALLED; - } else if (!validBases(baseString)) { - throw new IllegalArgumentException("Alleles of length 1 must be one of A,C,G,T, " + baseString + " was passed in"); - } else { // we're a valid base - mBases = baseString.toUpperCase(); - mLength = 0; - mType = TYPE.SINGLE_BASE; - } - } else { // deletion or insertion - if (baseString.length() < 1 || (baseString.toUpperCase().charAt(0) != 'D' && baseString.toUpperCase().charAt(0) != 'I')) { - throw new IllegalArgumentException("Genotype encoding of " + baseString + " was passed in, but is not a valid deletion, insertion, base, or no call (.)"); - } - if (baseString.toUpperCase().charAt(0) == 'D') { - mLength = Integer.valueOf(baseString.substring(1, baseString.length())); - mBases = ""; - mType = TYPE.DELETION; - } else { // we're an I - mBases = baseString.substring(1, baseString.length()).toUpperCase(); - if (!validBases(mBases)) - throw new IllegalArgumentException("The insertion base string contained invalid bases -> " + baseString); - mLength = mBases.length(); - mType = TYPE.INSERTION; - } - } - } - - public int getLength() { - return mLength; - } - - public String getBases() { - return mBases; - } - - public TYPE getType() { - return mType; - } - - public boolean equals(Object obj) { - if ( obj == null ) - return false; - if ( obj instanceof VCFGenotypeEncoding ) { - VCFGenotypeEncoding d = (VCFGenotypeEncoding) obj; - return (mType == d.mType) && (mBases.equals(d.mBases)) && (mLength == d.mLength); - } - if ( mType == TYPE.UNCALLED && obj.toString().equals(VCFGenotypeRecord.EMPTY_ALLELE) ) - return true; - return false; - } - - public int hashCode() { - // our underlying data is immutable, so this is safe (we won't strand a value in a hashtable somewhere - // when the data changes underneath, altering this value). - String str = this.mBases + String.valueOf(this.mLength) + this.mType.toString(); - return str.hashCode(); - } - - /** - * dump the string representation of this genotype encoding - * - * @return string representation - */ - public String toString() { - StringBuilder builder = new StringBuilder(); - switch (mType) { - case SINGLE_BASE: - case UNCALLED: - builder.append(mBases); - break; - case INSERTION: - builder.append("I"); - builder.append(mBases); - break; - case DELETION: - builder.append("D"); - builder.append(mLength); - break; - } - return builder.toString(); - } - - /** - * ensure that string contains valid bases - * - * @param bases the bases to check - * - * @return true if they're all either A,C,G,T; false otherwise - */ - private static boolean validBases(String bases) { - for (char c : bases.toUpperCase().toCharArray()) { - if (c != 'A' && c != 'C' && c != 'G' && c != 'T') - return false; - } - return true; - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java deleted file mode 100644 index cb4681e65..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java +++ /dev/null @@ -1,353 +0,0 @@ -package org.broadinstitute.sting.utils.genotype.vcf; - -import org.broadinstitute.sting.utils.GenomeLoc; - -import java.util.*; - - -/** - * @author aaron - *

- * Class VCFGenotypeRecord - *

- */ -public class VCFGenotypeRecord { - - // key names - public static final String GENOTYPE_KEY = "GT"; - public static final String GENOTYPE_QUALITY_KEY = "GQ"; - public static final String DEPTH_KEY = "DP"; - public static final String HAPLOTYPE_QUALITY_KEY = "HQ"; - public static final String GENOTYPE_FILTER_KEY = "FT"; - public static final String GENOTYPE_POSTERIORS_TRIPLET_KEY = "GL"; - public static final String OLD_DEPTH_KEY = "RD"; - - // the values for empty fields - public static final String EMPTY_GENOTYPE = "./."; - public static final String EMPTY_ALLELE = "."; - public static final int MISSING_GENOTYPE_QUALITY = -1; - public static final int MISSING_DEPTH = -1; - public static final int MISSING_HAPLOTYPE_QUALITY = -1; - public static final String PASSES_FILTERS = "0"; - public static final String UNFILTERED = "."; - - public static final double MAX_QUAL_VALUE = 99.0; - - // what kind of phasing this genotype has - public enum PHASE { - UNPHASED, PHASED, PHASED_SWITCH_PROB, UNKNOWN - } - - // our record - private VCFRecord mRecord; - - // our phasing - private PHASE mPhaseType; - - // our bases(s) - private final List mGenotypeAlleles = new ArrayList(); - - // our mapping of the format mFields to values - private final Map mFields = new HashMap(); - - // our sample name - private String mSampleName; - - /** - * Create a VCF genotype record - * - * @param sampleName sample name - * @param genotypes list of genotypes - * @param phasing phasing - */ - public VCFGenotypeRecord(String sampleName, List genotypes, PHASE phasing) { - mSampleName = sampleName; - if (genotypes != null) - this.mGenotypeAlleles.addAll(genotypes); - mPhaseType = phasing; - } - - public void setVCFRecord(VCFRecord record) { - mRecord = record; - } - - public void setSampleName(String name) { - mSampleName = name; - } - - /** - * Adds a field to the genotype record. - * Throws an exception if the key is GT, as that's computed internally. - * - * @param key the field name (use static variables above for common fields) - * @param value the field value - */ - public void setField(String key, String value) { - // make sure the GT field isn't being set - if ( key.equals(GENOTYPE_KEY) ) - throw new IllegalArgumentException("Setting the GT field is not allowed as that's done internally"); - mFields.put(key, value); - } - - /** - * determine the phase of the genotype - * - * @param phase the string that contains the phase character - * - * @return the phase - */ - static PHASE determinePhase(String phase) { - // find the phasing information - if (phase.equals("/")) - return PHASE.UNPHASED; - else if (phase.equals("|")) - return PHASE.PHASED; - else if (phase.equals("\\")) - return PHASE.PHASED_SWITCH_PROB; - else - throw new IllegalArgumentException("Unknown genotype phasing parameter"); - } - - - public PHASE getPhaseType() { - return mPhaseType; - } - - public String getSampleName() { - return mSampleName; - } - - public List getAlleles() { - return mGenotypeAlleles; - } - - public Map getFields() { - return mFields; - } - - /** - * @return the phred-scaled quality score - */ - public double getQual() { - return ( mFields.containsKey(GENOTYPE_QUALITY_KEY) ? Double.valueOf(mFields.get(GENOTYPE_QUALITY_KEY)) : MISSING_GENOTYPE_QUALITY); - } - - public boolean isMissingQual() { - return (int)getQual() == MISSING_GENOTYPE_QUALITY; - } - - public double getNegLog10PError() { - double qual = getQual(); - return (qual == MISSING_GENOTYPE_QUALITY ? MISSING_GENOTYPE_QUALITY : qual / 10.0); - } - - public int getReadCount() { - return ( mFields.containsKey(DEPTH_KEY) ? Integer.valueOf(mFields.get(DEPTH_KEY)) : MISSING_DEPTH); - } - - public GenomeLoc getLocation() { - return mRecord != null ? mRecord.getLocation() : null; - } - - public String getReference() { - return mRecord != null ? mRecord.getReference() : "N"; - } - - public String getBases() { - String genotype = ""; - for ( VCFGenotypeEncoding encoding : mGenotypeAlleles ) - genotype += encoding.getBases(); - return genotype; - } - - public boolean isVariant(char ref) { - for ( VCFGenotypeEncoding encoding : mGenotypeAlleles ) { - if ( encoding.getType() == VCFGenotypeEncoding.TYPE.UNCALLED ) - continue; - if ( encoding.getType() != VCFGenotypeEncoding.TYPE.SINGLE_BASE || - encoding.getBases().charAt(0) != ref ) - return true; - } - return false; - } - - public boolean isPointGenotype() { - return (mRecord != null ? !mRecord.isIndel() : true); - } - - public boolean isHom() { - if ( mGenotypeAlleles.size() == 0 ) - return true; - - String bases = mGenotypeAlleles.get(0).getBases(); - for ( int i = 1; i < mGenotypeAlleles.size(); i++ ) { - if ( !bases.equals(mGenotypeAlleles.get(1).getBases()) ) - return false; - } - return true; - } - - public boolean isHet() { - return !isHom(); - } - - public boolean isNoCall() { - for ( VCFGenotypeEncoding encoding : mGenotypeAlleles ) { - if ( encoding.getType() != VCFGenotypeEncoding.TYPE.UNCALLED ) - return false; - } - return true; - } - - public boolean isFiltered() { - return ( mFields.get(GENOTYPE_FILTER_KEY) != null && - !mFields.get(GENOTYPE_FILTER_KEY).equals(UNFILTERED) && - !mFields.get(GENOTYPE_FILTER_KEY).equals(PASSES_FILTERS)); - } - - public int getPloidy() { - return 2; - } - - public VCFRecord getRecord() { - return mRecord; - } - - private String toGenotypeString(List altAlleles) { - String str = ""; - boolean first = true; - for (VCFGenotypeEncoding allele : mGenotypeAlleles) { - if (allele.getType() == VCFGenotypeEncoding.TYPE.UNCALLED) - str += VCFGenotypeRecord.EMPTY_ALLELE; - else - str += String.valueOf((altAlleles.contains(allele)) ? altAlleles.indexOf(allele) + 1 : 0); - if (first) { - switch (mPhaseType) { - case UNPHASED: - str += "/"; - break; - case PHASED: - str += "|"; - break; - case PHASED_SWITCH_PROB: - str += "\\"; - break; - case UNKNOWN: - throw new UnsupportedOperationException("Unknown phase type"); - } - first = false; - } - } - return str; - - } - - @Override - public String toString() { - return String.format("[VCFGenotype %s %s %s %s]", getLocation(), mSampleName, this.mGenotypeAlleles, mFields); - } - - public boolean isEmptyGenotype() { - for ( VCFGenotypeEncoding encoding : mGenotypeAlleles ) { - if ( encoding.getType() != VCFGenotypeEncoding.TYPE.UNCALLED ) - return false; - } - return true; - } - - public boolean equals(Object other) { - if (other instanceof VCFGenotypeRecord) { - if (((VCFGenotypeRecord) other).mPhaseType != this.mPhaseType) return false; - if (!((VCFGenotypeRecord) other).mGenotypeAlleles.equals(this.mGenotypeAlleles)) return false; - if (!((VCFGenotypeRecord) other).mFields.equals(mFields)) return false; - if (!((VCFGenotypeRecord) other).mSampleName.equals(this.mSampleName)) return false; - return true; - } - return false; - } - - /** - * output a string representation of the VCFGenotypeRecord, given the alternate alleles - * - * @param altAlleles the alternate alleles, needed for toGenotypeString() - * @param genotypeFormatStrings genotype format strings - * - * @return a string - */ - public String toStringEncoding(List altAlleles, String[] genotypeFormatStrings) { - StringBuilder builder = new StringBuilder(); - builder.append(toGenotypeString(altAlleles)); - - for ( String field : genotypeFormatStrings ) { - if ( field.equals(GENOTYPE_KEY) ) - continue; - - String value = mFields.get(field); - if ( value == null && field.equals(OLD_DEPTH_KEY) ) - value = mFields.get(DEPTH_KEY); - - builder.append(VCFRecord.GENOTYPE_FIELD_SEPERATOR); - if ( value == null || value.equals("") ) - builder.append(getMissingFieldValue(field)); - else - builder.append(value); - } - - return builder.toString(); - } - - /** - * output a string representation of an empty genotype - * - * @param genotypeFormatStrings genotype format strings - * - * @return a string - */ - public static String stringEncodingForEmptyGenotype(String[] genotypeFormatStrings) { - StringBuilder builder = new StringBuilder(); - builder.append(EMPTY_GENOTYPE); - - for ( String field : genotypeFormatStrings ) { - if ( field.equals(GENOTYPE_KEY) ) - continue; - - builder.append(VCFRecord.GENOTYPE_FIELD_SEPERATOR); - builder.append(getMissingFieldValue(field)); - } - - return builder.toString(); - } - - public static String getMissingFieldValue(String field) { - String result = ""; - if ( field.equals(GENOTYPE_QUALITY_KEY) ) - result = String.valueOf(MISSING_GENOTYPE_QUALITY); - else if ( field.equals(DEPTH_KEY) || field.equals(OLD_DEPTH_KEY) ) - result = String.valueOf(MISSING_DEPTH); - else if ( field.equals(GENOTYPE_FILTER_KEY) ) - result = UNFILTERED; - else if ( field.equals(GENOTYPE_POSTERIORS_TRIPLET_KEY) ) - result = "0,0,0"; - // TODO -- support haplotype quality - //else if ( field.equals(HAPLOTYPE_QUALITY_KEY) ) - // result = String.valueOf(MISSING_HAPLOTYPE_QUALITY); - return result; - } - - public static Set getSupportedHeaderStrings() { - Set result = new HashSet(); - result.add(new VCFFormatHeaderLine(GENOTYPE_KEY, 1, VCFFormatHeaderLine.FORMAT_TYPE.String, "Genotype")); - result.add(new VCFFormatHeaderLine(GENOTYPE_QUALITY_KEY, 1, VCFFormatHeaderLine.FORMAT_TYPE.Float, "Genotype Quality")); - result.add(new VCFFormatHeaderLine(DEPTH_KEY, 1, VCFFormatHeaderLine.FORMAT_TYPE.Integer, "Read Depth (only filtered reads used for calling)")); - result.add(new VCFFormatHeaderLine(GENOTYPE_POSTERIORS_TRIPLET_KEY, 3, VCFFormatHeaderLine.FORMAT_TYPE.Float, "Log-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; not applicable if site is not biallelic")); - //result.add(new VCFFormatHeaderLine(HAPLOTYPE_QUALITY_KEY, 1, VCFFormatHeaderLine.INFO_TYPE.Integer, "Haplotype Quality")); - return result; - } - - public void replaceFields(HashMap newFields) { - mFields.clear(); - for ( String s : newFields.keySet() ) { - mFields.put(s,newFields.get(s)); - } - } -} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriter.java index 1c19b77a3..a7f705272 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriter.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.utils.genotype.vcf; +import org.broad.tribble.vcf.VCFHeaderLine; +import org.broad.tribble.vcf.VCFRecord; import org.broadinstitute.sting.utils.genotype.GenotypeWriter; import java.util.Set; @@ -24,7 +26,7 @@ public interface VCFGenotypeWriter extends GenotypeWriter { * Add a given VCF record to the given output. * @param vcfRecord Record to add. */ - public void addRecord(VCFRecord vcfRecord); + public void addRecord(VCFRecord vcfRecord); /** * set the validation stringency diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java index d6909692e..201e2f954 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java @@ -1,5 +1,9 @@ package org.broadinstitute.sting.utils.genotype.vcf; +import org.broad.tribble.vcf.VCFFormatHeaderLine; +import org.broad.tribble.vcf.VCFHeader; +import org.broad.tribble.vcf.VCFHeaderLine; +import org.broad.tribble.vcf.VCFRecord; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; @@ -64,7 +68,7 @@ public class VCFGenotypeWriterAdapter implements VCFGenotypeWriter { // set up the allowed genotype format fields allowedGenotypeFormatStrings = new ArrayList(); for ( VCFHeaderLine field : headerInfo ) { - if ( field instanceof VCFFormatHeaderLine ) + if ( field instanceof VCFFormatHeaderLine) allowedGenotypeFormatStrings.add(((VCFFormatHeaderLine)field).getName()); } } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFHeader.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFHeader.java deleted file mode 100644 index 6fb2b7a8b..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFHeader.java +++ /dev/null @@ -1,151 +0,0 @@ -package org.broadinstitute.sting.utils.genotype.vcf; - -import java.util.*; - - -/** - * @author aaron - *

- * Class VCFHeader - *

- * A class representing the VCF header - */ -public class VCFHeader { - - public static final String FILE_FORMAT_KEY = "fileformat"; - public static final String OLD_FILE_FORMAT_KEY = "format"; // from version 3.2 - - - /** the current vcf version we support. */ - public static final String VCF_VERSION_HEADER = "VCFv"; - public static final String OLD_VCF_VERSION_HEADER = "VCRv"; // from version 3.2 - public static final double VCF_VERSION_NUMBER = 3.3; - public static final String VCF_VERSION = VCF_VERSION_HEADER + VCF_VERSION_NUMBER; - - - // the manditory header fields - public enum HEADER_FIELDS { - CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO - } - - // the associated meta data - private final Set mMetaData; - - // the list of auxillary tags - private final Set mGenotypeSampleNames = new LinkedHashSet(); - - // the character string that indicates meta data - public static final String METADATA_INDICATOR = "##"; - - // the header string indicator - public static final String HEADER_INDICATOR = "#"; - - - /** do we have genotying data? */ - private boolean hasGenotypingData = false; - - /** - * create a VCF header, given a list of meta data and auxillary tags - * - * @param metaData the meta data associated with this header - */ - public VCFHeader(Set metaData) { - mMetaData = new TreeSet(metaData); - checkVCFVersion(); - } - - /** - * create a VCF header, given a list of meta data and auxillary tags - * - * @param metaData the meta data associated with this header - * @param genotypeSampleNames the genotype format field, and the sample names - */ - public VCFHeader(Set metaData, Set genotypeSampleNames) { - mMetaData = new TreeSet(metaData); - for (String col : genotypeSampleNames) { - if (!col.equals("FORMAT")) - mGenotypeSampleNames.add(col); - } - if (genotypeSampleNames.size() > 0) hasGenotypingData = true; - checkVCFVersion(); - } - - /** - * check our metadata for a VCF version tag, and throw an exception if the version is out of date - * or the version is not present - */ - public void checkVCFVersion() { - String version = null; - for ( VCFHeaderLine line : mMetaData ) { - if ( line.getKey().equals(FILE_FORMAT_KEY) || line.getKey().equals(OLD_FILE_FORMAT_KEY) ) { - version = line.getValue(); - break; - } - } - - if ( version == null ) - mMetaData.add(new VCFHeaderLine(FILE_FORMAT_KEY, VCF_VERSION)); - else if ( !isSupportedVersion(version) ) - throw new RuntimeException("VCF version " + version + - " is not yet supported; only version " + VCF_VERSION + " and earlier can be used"); - } - - private boolean isSupportedVersion(String version) { - if ( !version.startsWith(VCF_VERSION_HEADER) && !version.startsWith(OLD_VCF_VERSION_HEADER) ) - return false; - try { - double dVersion = Double.valueOf(version.substring(VCF_VERSION_HEADER.length())); - return dVersion <= VCF_VERSION_NUMBER; - } catch (Exception e) { } - - return false; - } - - /** - * get the header fields in order they're presented in the input file (which is now required to be - * the order presented in the spec). - * - * @return a set of the header fields, in order - */ - public Set getHeaderFields() { - Set fields = new LinkedHashSet(); - for (HEADER_FIELDS field : HEADER_FIELDS.values()) - fields.add(field); - return fields; - } - - /** - * get the meta data, associated with this header - * - * @return a set of the meta data - */ - public Set getMetaData() { - return mMetaData; - } - - /** - * get the genotyping sample names - * - * @return a list of the genotype column names, which may be empty if hasGenotypingData() returns false - */ - public Set getGenotypeSamples() { - return mGenotypeSampleNames; - } - - /** - * do we have genotyping data? - * - * @return true if we have genotyping columns, false otherwise - */ - public boolean hasGenotypingData() { - return hasGenotypingData; - } - - /** @return the column count, */ - public int getColumnCount() { - return HEADER_FIELDS.values().length + ((hasGenotypingData) ? mGenotypeSampleNames.size() + 1 : 0); - } -} - - - diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderLine.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderLine.java deleted file mode 100755 index b89c9d80b..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderLine.java +++ /dev/null @@ -1,86 +0,0 @@ -package org.broadinstitute.sting.utils.genotype.vcf; - - -/** - * @author ebanks - *

- * Class VCFHeaderLine - *

- * A class representing a key=value entry in the VCF header - */ -public class VCFHeaderLine implements Comparable { - - private String stringRep = null; - private String mKey = null; - private String mValue = null; - - - /** - * create a VCF header line - * - * @param key the key for this header line - * @param value the value for this header line - */ - public VCFHeaderLine(String key, String value) { - mKey = key; - mValue = value; - } - - /** - * Get the key - * - * @return the key - */ - public String getKey() { - return mKey; - } - - /** - * Set the key - * - * @param key the key for this header line - */ - public void setKey(String key) { - mKey = key; - stringRep = null; - } - - /** - * Get the value - * - * @return the value - */ - public String getValue() { - return mValue; - } - - /** - * Set the value - * - * @param value the value for this header line - */ - public void setValue(String value) { - mValue = value; - stringRep = null; - } - - public String toString() { - if ( stringRep == null ) - stringRep = makeStringRep(); - return stringRep; - } - - protected String makeStringRep() { - return mKey + "=" + mValue; - } - - public boolean equals(Object o) { - if ( !(o instanceof VCFHeaderLine) ) - return false; - return mKey.equals(((VCFHeaderLine)o).getKey()) && mValue.equals(((VCFHeaderLine)o).getValue()); - } - - public int compareTo(Object other) { - return toString().compareTo(other.toString()); - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFInfoHeaderLine.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFInfoHeaderLine.java deleted file mode 100755 index 11a49f9a0..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFInfoHeaderLine.java +++ /dev/null @@ -1,75 +0,0 @@ -package org.broadinstitute.sting.utils.genotype.vcf; - -import org.broadinstitute.sting.utils.Utils; - - -/** - * @author ebanks - *

- * Class VCFInfoHeaderLine - *

- * A class representing a key=value entry for INFO fields in the VCF header - */ -public class VCFInfoHeaderLine extends VCFHeaderLine { - - // the info field types - public enum INFO_TYPE { - Integer, Float, String, Character, Flag - } - - private String mName; - private int mCount; - private String mDescription; - private INFO_TYPE mType; - - - /** - * create a VCF info header line - * - * @param name the name for this header line - * @param count the count for this header line - * @param type the type for this header line - * @param description the description for this header line - */ - public VCFInfoHeaderLine(String name, int count, INFO_TYPE type, String description) { - super("INFO", ""); - mName = name; - mCount = count; - mType = type; - mDescription = description; - } - - /** - * create a VCF info header line - * - * @param line the header line - */ - protected VCFInfoHeaderLine(String line) { - super("INFO", ""); - String[] pieces = line.split(","); - if ( pieces.length < 4 ) - throw new IllegalArgumentException("There are too few values in the VCF INFO header line: " + line); - - mName = pieces[0]; - mCount = Integer.valueOf(pieces[1]); - mType = INFO_TYPE.valueOf(pieces[2]); - mDescription = Utils.trim(pieces[3], '"'); - // just in case there were some commas in the description - for (int i = 4; i < pieces.length; i++) - mDescription += "," + Utils.trim(pieces[i], '"'); - } - - protected String makeStringRep() { - return String.format("INFO=%s,%d,%s,\"%s\"", mName, mCount, mType.toString(), mDescription); - } - - public boolean equals(Object o) { - if ( !(o instanceof VCFInfoHeaderLine) ) - return false; - VCFInfoHeaderLine other = (VCFInfoHeaderLine)o; - return mName.equals(other.mName) && - mCount == other.mCount && - mDescription.equals(other.mDescription) && - mType == other.mType; - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFParameters.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFParameters.java index ce9c5a4f4..9e8d7c7b5 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFParameters.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFParameters.java @@ -1,5 +1,8 @@ package org.broadinstitute.sting.utils.genotype.vcf; +import org.broad.tribble.vcf.VCFGenotypeEncoding; +import org.broad.tribble.vcf.VCFGenotypeRecord; +import org.broad.tribble.vcf.VCFRecord; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.Utils; diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java index 99edd48db..b844fe4e7 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java @@ -20,6 +20,9 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.zip.GZIPInputStream; +import org.broad.tribble.FeatureReader; +import org.broad.tribble.vcf.*; +import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.Utils; /** The VCFReader class, which given a valid vcf file, parses out the header and VCF records */ @@ -28,14 +31,11 @@ public class VCFReader implements Iterator, Iterable { // our VCF header private VCFHeader mHeader; - // our buffered input stream - private BufferedReader mReader; + // our iterator + private Iterator iterator; - // our next record - private VCFRecord mNextRecord = null; - - // our pattern matching for the genotype mFields - private static final Pattern gtPattern = Pattern.compile("([0-9\\.]+)([\\\\|\\/])([0-9\\.]*)"); + // our feature reader; so we can close it + private FeatureReader vcfReader = null; /** * Create a VCF reader, given a VCF file @@ -43,89 +43,36 @@ public class VCFReader implements Iterator, Iterable { * @param vcfFile the vcf file to write */ public VCFReader(File vcfFile) { - if (vcfFile.getName().endsWith(".gz")) - openGZipFile(vcfFile); - else - openTextVersion(vcfFile); - - this.parseHeader(); + initialize(vcfFile, null); } /** - * Create a VCF reader, given a stream. + * Create a VCF reader, given a VCF file * - * @param stream the stream file read + * @param vcfFile the vcf file to write */ - public VCFReader (InputStream stream) - { - mReader = new BufferedReader( - new InputStreamReader( - stream, - Charset.forName("UTF-8"))); - this.parseHeader(); - } - - /** - * parse the header from the top of the file. We read lines that match - * the header pattern, and try to create a header. We then create the first - * record from the first non-header line in the file. - */ - private void parseHeader() { - String line = null; - // try and parse the header - try { - ArrayList lines = new ArrayList(); - line = mReader.readLine(); - while (line != null && line.startsWith("#")) { - lines.add(line); - line = mReader.readLine(); - } - // try to make a header from the lines we parsed - mHeader = this.createHeader(lines); - - // if the last line we parsed is null, we shouldn't try to make a record of it - if (line != null) mNextRecord = createRecord(line, mHeader); - } catch (IOException e) { - throw new RuntimeException("VCFReader: Failed to parse VCF File on line: " + line, e); - } + public VCFReader(File vcfFile, VCFCodec.LineTransform transform) { + initialize(vcfFile, transform); } - /** - * open a g-zipped version of the VCF format - * - * @param vcfGZipFile the file to open - */ - private void openGZipFile(File vcfGZipFile) { + private void initialize(File vcfFile, VCFCodec.LineTransform transform) { + VCFCodec codec = new VCFCodec(); + if (transform != null) codec.setTransformer(transform); try { - mReader = new BufferedReader( - new InputStreamReader(new GZIPInputStream( - new FileInputStream(vcfGZipFile)))); + vcfReader = new FeatureReader(vcfFile,codec); + iterator= vcfReader.iterator(); } catch (FileNotFoundException e) { - throw new RuntimeException("VCFReader: Unable to find VCF file: " + vcfGZipFile, e); + throw new StingException("Unable to read VCF File from " + vcfFile, e); } catch (IOException e) { - throw new RuntimeException("VCFReader: A problem occured trying to open the file using the gzipped decompressor, filename: " + vcfGZipFile, e); + throw new StingException("Unable to read VCF File from " + vcfFile, e); } + mHeader = codec.getHeader(); } - /** - * open the vcf file as a text file - * - * @param vcfFile the vcf file name - */ - private void openTextVersion(File vcfFile) { - try { - mReader = new BufferedReader( - new InputStreamReader( - new FileInputStream(vcfFile), - Charset.forName("UTF-8"))); - } catch (FileNotFoundException e) { - throw new RuntimeException("VCFReader: Unable to find VCF text file: " + vcfFile, e); - } - } /** @return true if we have another VCF record to return */ public boolean hasNext() { - return (mNextRecord != null); + return (iterator.hasNext()); } /** @@ -134,15 +81,7 @@ public class VCFReader implements Iterator, Iterable { * @return a VCFRecord, representing the next record in the file */ public VCFRecord next() { - VCFRecord rec = mNextRecord; - try { - String line = mReader.readLine(); - if (line == null) mNextRecord = null; - else mNextRecord = createRecord(line, mHeader); - } catch (IOException e) { - mNextRecord = null; - } - return rec; + return iterator.next(); } /** Remove is not supported */ @@ -150,196 +89,6 @@ public class VCFReader implements Iterator, Iterable { throw new UnsupportedOperationException("Unsupported operation"); } - /** - * create a VCF header, given an array of strings that all start with at least the # character. This function is - * package protected so that the VCFReader can access this function - * - * @param headerStrings a list of header strings - * - * @return a VCF Header created from the list of stinrgs - */ - protected VCFHeader createHeader(List headerStrings) { - Set metaData = new TreeSet(); - Set auxTags = new LinkedHashSet(); - // iterate over all the passed in strings - for ( String str : headerStrings ) { - if ( !str.startsWith("##") ) { - String[] strings = str.substring(1).split("\t"); - // the columns should be in order according to Richard Durbin - int arrayIndex = 0; - for (VCFHeader.HEADER_FIELDS field : VCFHeader.HEADER_FIELDS.values()) { - try { - if (field != VCFHeader.HEADER_FIELDS.valueOf(strings[arrayIndex])) - throw new RuntimeException("VCFReader: we were expecting column name " + field + " but we saw " + strings[arrayIndex]); - } catch (IllegalArgumentException e) { - throw new RuntimeException("VCFReader: Unknown column name \"" + strings[arrayIndex] + "\", it does not match a known column header name."); - } - arrayIndex++; - } - while (arrayIndex < strings.length) { - if (!strings[arrayIndex].equals("FORMAT")) - auxTags.add(strings[arrayIndex]); - arrayIndex++; - } - } else { - if ( str.startsWith("##INFO=") ) - metaData.add(new VCFInfoHeaderLine(str.substring(7))); - else if ( str.startsWith("##FILTER=") ) - metaData.add(new VCFFilterHeaderLine(str.substring(9))); - else if ( str.startsWith("##FORMAT=") ) - metaData.add(new VCFFormatHeaderLine(str.substring(9))); - else { - int equals = str.indexOf("="); - if ( equals != -1 ) - metaData.add(new VCFHeaderLine(str.substring(2, equals), str.substring(equals+1))); - } - } - } - - return new VCFHeader(metaData, auxTags); - } - - /** - * create the next VCFRecord, given the input line - * - * @param line the line from the file - * @param mHeader the VCF header - * @return the VCFRecord - */ - public static VCFRecord createRecord(String line, VCFHeader mHeader) { - try { - // things we need to make a VCF record - Map values = new HashMap(); - String tokens[] = line.split("\t"); - - // check to ensure that the column count of tokens is right - if (tokens.length != mHeader.getColumnCount()) { - throw new RuntimeException("The input file line doesn't contain enough fields, it should have " + mHeader.getColumnCount() + " fields, it has " + tokens.length + ". Line = " + line); - } - - int index = 0; - for (VCFHeader.HEADER_FIELDS field : mHeader.getHeaderFields()) - values.put(field, tokens[index++]); - // if we have genotyping data, we try and extract the genotype fields - if (mHeader.hasGenotypingData()) { - String mFormatString = tokens[index]; - String keyStrings[] = mFormatString.split(":"); - List genotypeRecords = new ArrayList(); - index++; - String[] alt_alleles = values.get(VCFHeader.HEADER_FIELDS.ALT).split(","); - for (String str : mHeader.getGenotypeSamples()) { - genotypeRecords.add(getVCFGenotype(str, keyStrings, tokens[index], alt_alleles, values.get(VCFHeader.HEADER_FIELDS.REF).charAt(0))); - index++; - } - VCFRecord vrec = new VCFRecord(values, mFormatString, genotypeRecords); - // associate the genotypes with this new record - for (VCFGenotypeRecord gr : genotypeRecords) - gr.setVCFRecord(vrec); - return vrec; - - } - return new VCFRecord(values); - } catch (Exception e) { - throw new VCFParseException("VCF Reader failed to parsing, on line = " + line, e); - } - } - - /** - * generate a VCF genotype record, given it's format string, the genotype string, and allele info - * - * @param sampleName the sample name - * @param formatString the format string for this record, which contains the keys for the genotype parameters - * @param genotypeString contains the phasing information, allele information, and values for genotype parameters - * @param altAlleles the alternate allele string array, which we index into based on the field parameters - * @param referenceBase the reference base - * - * @return a VCFGenotypeRecord - */ - public static VCFGenotypeRecord getVCFGenotype(String sampleName, String formatString, String genotypeString, String altAlleles[], char referenceBase) { - return getVCFGenotype(sampleName, formatString.split(":"), genotypeString, altAlleles, referenceBase); - } - - /** - * generate a VCF genotype record, given it's format string, the genotype string, and allele info - * - * @param sampleName the sample name - * @param keyStrings the split format string for this record, which contains the keys for the genotype parameters - * @param genotypeString contains the phasing information, allele information, and values for genotype parameters - * @param altAlleles the alternate allele string array, which we index into based on the field parameters - * @param referenceBase the reference base - * - * @return a VCFGenotypeRecord - */ - public static VCFGenotypeRecord getVCFGenotype(String sampleName, String[] keyStrings, String genotypeString, String altAlleles[], char referenceBase) { - // parameters to create the VCF genotype record - HashMap tagToValue = new HashMap(); - VCFGenotypeRecord.PHASE phase = VCFGenotypeRecord.PHASE.UNKNOWN; - List bases = new ArrayList(); - - for (String key : keyStrings) { - String parse; - int nextDivider; - if (!genotypeString.contains(":")) { - nextDivider = genotypeString.length(); - parse = genotypeString; - } else { - nextDivider = (genotypeString.indexOf(":") > genotypeString.length()) ? genotypeString.length() : genotypeString.indexOf(":"); - parse = genotypeString.substring(0, nextDivider); - } - if (key.equals(VCFGenotypeRecord.GENOTYPE_KEY)) { - Matcher m = gtPattern.matcher(parse); - if (!m.matches()) - throw new RuntimeException("VCFReader: Unable to match GT genotype flag to it's expected pattern, the field was: " + parse); - phase = VCFGenotypeRecord.determinePhase(m.group(2)); - addAllele(m.group(1), altAlleles, referenceBase, bases); - if (m.group(3).length() > 0) addAllele(m.group(3), altAlleles, referenceBase, bases); - } else { - if ( parse.length() == 0 ) - parse = VCFGenotypeRecord.getMissingFieldValue(key); - tagToValue.put(key, parse); - } - if (nextDivider + 1 >= genotypeString.length()) nextDivider = genotypeString.length() - 1; - genotypeString = genotypeString.substring(nextDivider + 1, genotypeString.length()); - } - if ( bases.size() > 0 && bases.get(0).equals(VCFGenotypeRecord.EMPTY_ALLELE) ) - tagToValue.clear(); - // catch some common errors, either there are too many field keys or there are two many field values - else if ( keyStrings.length != tagToValue.size() + ((bases.size() > 0) ? 1 : 0)) - throw new RuntimeException("VCFReader: genotype value count doesn't match the key count (expected " - + keyStrings.length + " but saw " + tagToValue.size() + ")"); - else if ( genotypeString.length() > 0 ) - throw new RuntimeException("VCFReader: genotype string contained additional unprocessed fields: " + genotypeString - + ". This most likely means that the format string is shorter then the value fields."); - - VCFGenotypeRecord rec = new VCFGenotypeRecord(sampleName, bases, phase); - for ( Map.Entry entry : tagToValue.entrySet() ) - rec.setField(entry.getKey(), entry.getValue()); - return rec; - } - - - /** - * add an alternate allele to the list of alleles we have for a VCF genotype record - * - * @param alleleNumber the allele number, as a string - * @param altAlleles the list of alternate alleles - * @param referenceBase the reference base - * @param bases the list of bases for this genotype call - */ - private static void addAllele(String alleleNumber, String[] altAlleles, char referenceBase, List bases) { - if (alleleNumber.equals(VCFGenotypeRecord.EMPTY_ALLELE)) { - bases.add(new VCFGenotypeEncoding(VCFGenotypeRecord.EMPTY_ALLELE)); - } else { - int alleleValue = Integer.valueOf(alleleNumber); - // check to make sure the allele value is within bounds - if (alleleValue < 0 || alleleValue > altAlleles.length) - throw new IllegalArgumentException("VCFReader: the allele value of " + alleleValue + " is out of bounds given the alternate allele list."); - if (alleleValue == 0) - bases.add(new VCFGenotypeEncoding(String.valueOf(referenceBase))); - else - bases.add(new VCFGenotypeEncoding(altAlleles[alleleValue - 1])); - } - } /** @return get the header associated with this reader */ @@ -352,11 +101,12 @@ public class VCFReader implements Iterator, Iterable { } public void close() { - try { - mReader.close(); + if (vcfReader != null) try { + vcfReader.close(); } catch (IOException e) { - // we don't really care - Utils.warnUser("Unable to close VCF reader file, this is not fatal, but is worth noting"); + throw new StingException("Unable to close vcfReader",e); } + iterator = null; } + } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java deleted file mode 100644 index bf5090d1b..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java +++ /dev/null @@ -1,658 +0,0 @@ -package org.broadinstitute.sting.utils.genotype.vcf; - - -import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.genotype.*; - -import java.util.*; - -/** - * the basic VCF record type - */ -public class VCFRecord { - - // standard info field keys - public static final String ANCESTRAL_ALLELE_KEY = "AA"; - public static final String ALLELE_COUNT_KEY = "AC"; - public static final String ALLELE_FREQUENCY_KEY = "AF"; - public static final String ALLELE_NUMBER_KEY = "AN"; - public static final String RMS_BASE_QUALITY_KEY = "BQ"; - public static final String DBSNP_KEY = "DB"; - public static final String DEPTH_KEY = "DP"; - public static final String HAPMAP2_KEY = "H2"; - public static final String HAPMAP3_KEY = "H3"; - public static final String RMS_MAPPING_QUALITY_KEY = "MQ"; - public static final String SAMPLE_NUMBER_KEY = "NS"; - public static final String STRAND_BIAS_KEY = "SB"; - - // commonly used strings that are in the standard - public static final String FORMAT_FIELD_SEPERATOR = ":"; - public static final String GENOTYPE_FIELD_SEPERATOR = ":"; - public static final String FIELD_SEPERATOR = "\t"; - public static final String FILTER_CODE_SEPERATOR = ";"; - public static final String INFO_FIELD_SEPERATOR = ";"; - - // default values - public static final String UNFILTERED = "."; - public static final String PASSES_FILTERS = "0"; - public static final String EMPTY_INFO_FIELD = "."; - public static final String EMPTY_ID_FIELD = "."; - public static final String EMPTY_ALLELE_FIELD = "."; - public static final String DOUBLE_PRECISION_FORMAT_STRING = "%.2f"; - public static final int MISSING_GENOTYPE_QUALITY = -1; - - // the reference base - private String mReferenceBases; - // our location - private GenomeLoc mLoc; - // our id - private String mID; - // the alternate bases - private final List mAlts = new ArrayList(); - // our qual value - private double mQual; - // our filter string - private String mFilterString; - // our info fields -- use a TreeMap to ensure they can be pulled out in order (so it passes integration tests) - private final Map mInfoFields = new TreeMap(); - - private String mGenotypeFormatString; - - // our genotype sample fields - private final List mGenotypeRecords = new ArrayList(); - - /** - * given a reference base, a location, and the format string, create a VCF record. - * - * @param referenceBases the reference bases to use - * @param location our genomic location - * @param genotypeFormatString the format string - */ - public VCFRecord(String referenceBases, GenomeLoc location, String genotypeFormatString) { - setReferenceBase(referenceBases); - setLocation(location); - mGenotypeFormatString = genotypeFormatString; - } - - /** - * given the values for each of the columns, create a VCF record. - * - * @param columnValues a mapping of header strings to values - * @param genotypeFormatString the format string for the genotype records - * @param genotypeRecords the genotype records - */ - public VCFRecord(Map columnValues, String genotypeFormatString, List genotypeRecords) { - extractFields(columnValues); - mGenotypeRecords.addAll(genotypeRecords); - mGenotypeFormatString = genotypeFormatString; - } - - /** - * given the values for each of the columns, create a VCF record. - * - * @param columnValues a mapping of header strings to values - */ - public VCFRecord(Map columnValues) { - extractFields(columnValues); - mGenotypeFormatString = ""; - } - - /** - * create a VCF record - * - * @param referenceBases the reference bases to use - * @param contig the contig this variant is on - * @param position our position - * @param ID our ID string - * @param altBases the list of alternate bases - * @param qual the qual field - * @param filters the filters used on this variant - * @param infoFields the information fields - * @param genotypeFormatString the format string - * @param genotypeObjects the genotype objects - */ - public VCFRecord(String referenceBases, - String contig, - long position, - String ID, - List altBases, - double qual, - String filters, - Map infoFields, - String genotypeFormatString, - List genotypeObjects) { - setReferenceBase(referenceBases); - setLocation(contig, position); - this.mID = ID; - for (VCFGenotypeEncoding alt : altBases) - this.addAlternateBase(alt); - this.setQual(qual); - this.setFilterString(filters); - this.mInfoFields.putAll(infoFields); - this.mGenotypeFormatString = genotypeFormatString; - this.mGenotypeRecords.addAll(genotypeObjects); - } - - /** - * extract the field values from the passed in array - * - * @param columnValues a map of the header fields to values - */ - private void extractFields(Map columnValues) { - String chrom = null; - long position = -1; - - for (VCFHeader.HEADER_FIELDS val : columnValues.keySet()) { - switch (val) { - case CHROM: - chrom = columnValues.get(val); - break; - case POS: - position = Integer.valueOf(columnValues.get(val)); - break; - case ID: - setID(columnValues.get(val)); - break; - case REF: - if (columnValues.get(val).length() != 1) - throw new IllegalArgumentException("Reference base should be a single character"); - setReferenceBase(columnValues.get(val)); - break; - case ALT: - String values[] = columnValues.get(val).split(","); - for (String alt : values) - addAlternateBase(new VCFGenotypeEncoding(alt)); - break; - case QUAL: - setQual(Double.valueOf(columnValues.get(val))); - break; - case FILTER: - setFilterString(columnValues.get(val)); - break; - case INFO: - String vals[] = columnValues.get(val).split(";"); - for (String alt : vals) { - if ( alt.equals(EMPTY_INFO_FIELD) ) - continue; - String keyVal[] = alt.split("="); - if ( keyVal.length == 1 ) - addInfoField(keyVal[0], ""); - else if (keyVal.length == 2) - addInfoField(keyVal[0], keyVal[1]); - else - throw new IllegalArgumentException("info field key-value pair did not parse into key->value pair: " + alt); - } - break; - } - } - setLocation(chrom, position); - } - - /** - * do we have genotyping data - * - * @return true if we have genotyping data, false otherwise - */ - - public boolean hasGenotypeData() { - return (mGenotypeRecords.size() > 0); - } - - /** - * @return this VCF record's location - */ - public GenomeLoc getLocation() { - return mLoc; - } - - /** - * @return the ID value for this record - */ - public String getID() { - return mID == null ? EMPTY_ID_FIELD : mID; - } - - /** - * get the reference base - * - * @return either A, T, C, G, or N - */ - public String getReference() { - return mReferenceBases; - } - - /** - * get the alternate allele strings - * - * @return an array of strings representing the alt alleles, or null if there are none - */ - public List getAlternateAlleleList() { - ArrayList alts = new ArrayList(); - for ( VCFGenotypeEncoding alt : mAlts ) - alts.add(alt.getBases()); - return alts; - } - - public List getAlternateAlleles() { - return mAlts; - } - - public boolean hasAlternateAllele() { - for ( VCFGenotypeEncoding alt : mAlts ) { - if ( alt.getType() != VCFGenotypeEncoding.TYPE.UNCALLED ) - return true; - } - - return false; - } - - public boolean isBiallelic() { - return getAlternateAlleles().size() == 1; - } - - public boolean isReference() { - return !hasAlternateAllele(); - } - - public List getAlleleList() { - ArrayList list = new ArrayList(); - list.add(getReference()); - list.addAll(getAlternateAlleleList()); - return list; - } - - public double getNonRefAlleleFrequency() { - if ( mInfoFields.containsKey(ALLELE_FREQUENCY_KEY) ) { - return Double.valueOf(mInfoFields.get(ALLELE_FREQUENCY_KEY)); - } else { - // this is the poor man's AF - if ( mInfoFields.containsKey(ALLELE_COUNT_KEY) && mInfoFields.containsKey(ALLELE_NUMBER_KEY)) { - String splt[] = mInfoFields.get(ALLELE_COUNT_KEY).split(","); - if ( splt.length > 0 ) { - return (Double.valueOf(splt[0]) / Double.valueOf(mInfoFields.get(ALLELE_NUMBER_KEY))); - } - } - } - - return 0.0; - } - - public VCFGenotypeEncoding.TYPE getType() { - VCFGenotypeEncoding.TYPE type = mAlts.get(0).getType(); - for (int i = 1; i < mAlts.size(); i++) { - if ( mAlts.get(i).getType() != type ) - return VCFGenotypeEncoding.TYPE.MIXED; // if we have more than one type, return mixed - } - return type; - } - - public boolean isDeletion() { - return getType() == VCFGenotypeEncoding.TYPE.DELETION; - } - - public boolean isInsertion() { - return getType() == VCFGenotypeEncoding.TYPE.INSERTION; - } - - public boolean isIndel() { - return isDeletion() || isInsertion(); - } - - public boolean isSNP() { - return getType() == VCFGenotypeEncoding.TYPE.SINGLE_BASE; - } - - public boolean isNovel() { - return ( ! isInDBSNP() ) && ( ! isInHapmap() ); - } - - public boolean isInDBSNP() { - return ( ( mID != null && ! mID.equals(".") ) || ( mInfoFields.get(DBSNP_KEY) != null && mInfoFields.get(DBSNP_KEY).equals("1") ) ); - } - - public boolean isInHapmap() { - if ( mInfoFields.get(HAPMAP2_KEY) != null && mInfoFields.get(HAPMAP2_KEY).equals("1") ) { - return true; - } else { - return ( mInfoFields.get(HAPMAP3_KEY) != null && mInfoFields.get(HAPMAP3_KEY).equals("1") ); - } - } - - public char getAlternativeBaseForSNP() { - if ( !isSNP() && !isBiallelic() ) - throw new IllegalStateException("This record does not represent a SNP"); - return mAlts.get(0).getBases().charAt(0); - } - - public char getReferenceForSNP() { - if ( !isSNP() ) - throw new IllegalStateException("This record does not represent a SNP"); - return getReference().charAt(0); - } - - /** - * @return the phred-scaled quality score - */ - public double getQual() { - return mQual; - } - - public boolean isMissingQual() { - return (int)mQual == MISSING_GENOTYPE_QUALITY; - } - - /** - * @return the -log10PError - */ - public double getNegLog10PError() { - return mQual / 10.0; - } - - /** - * get the filter criteria - * - * @return an array of strings representing the filtering criteria, or UNFILTERED if none are applied - */ - public String[] getFilteringCodes() { - if (mFilterString == null) return new String[]{UNFILTERED}; - return mFilterString.split(FILTER_CODE_SEPERATOR); - } - - public boolean isFiltered() { - String[] codes = getFilteringCodes(); - return !codes[0].equals(UNFILTERED) && !codes[0].equals(PASSES_FILTERS); - } - -// public boolean hasFilteringCodes() { -// return mFilterString != null; -// } - - public String getFilterString() { - return mFilterString; - } - - /** - * get the information key-value pairs as a Map<> - * - * @return a map, of the info key-value pairs - */ - public final Map getInfoValues() { - return mInfoFields; - } - - public List getVCFGenotypeRecords() { - return mGenotypeRecords; - } - - /** - * @return a List of the sample names - */ - public String[] getSampleNames() { - String names[] = new String[mGenotypeRecords.size()]; - for (int i = 0; i < mGenotypeRecords.size(); i++) { - names[i] = mGenotypeRecords.get(i).getSampleName(); - } - return names; - } - - public VCFGenotypeRecord getGenotype(final String sampleName) { - for ( VCFGenotypeRecord rec : getVCFGenotypeRecords() ) { - if ( rec.getSampleName().equals(sampleName) ) { - return rec; - } - } - - return null; - } - - public String getGenotypeFormatString() { - return mGenotypeFormatString; - }// the formatting string for our genotype records - - public void setGenotypeFormatString(String newFormatString) { - mGenotypeFormatString = newFormatString; - } - - public void setReferenceBase(String reference) { - mReferenceBases = reference.toUpperCase(); - } - - public void setLocation(String chrom, long position) { - if ( chrom == null ) - throw new IllegalArgumentException("Chromosomes cannot be missing"); - if ( position < 0 ) - throw new IllegalArgumentException("Position values must be greater than 0"); - mLoc = GenomeLocParser.createGenomeLoc(chrom, position); - } - - public void setLocation(GenomeLoc location) { - mLoc = location.clone(); - } - - public void setID(String ID) { - mID = ID; - } - - public void setQual(double qual) { - if ( qual < 0 && (int)qual != MISSING_GENOTYPE_QUALITY ) - throw new IllegalArgumentException("Qual values cannot be negative unless they are " + MISSING_GENOTYPE_QUALITY + " ('unknown')"); - mQual = qual; - } - - public void setFilterString(String filterString) { - mFilterString = filterString; - } - - public void addGenotypeRecord(VCFGenotypeRecord mGenotypeRecord) { - mGenotypeRecords.add(mGenotypeRecord); - } - - public void setGenotypeRecords(List records) { - mGenotypeRecords.clear(); - for ( VCFGenotypeRecord g : records ) - addGenotypeRecord(g); - } - - /** - * add an alternate base to our alternate base list. All bases are uppercased - * before being added to the list. - * - * @param base the base to add - */ - public void addAlternateBase(VCFGenotypeEncoding base) { - if (!mAlts.contains(base)) mAlts.add(base); - } - - public void setAlternateBases(List bases) { - mAlts.clear(); - for ( VCFGenotypeEncoding e : bases ) - addAlternateBase(e); - } - - /** - * add an info field to the record - * - * @param key the key, from the spec or a user created key - * @param value it's value as a string - */ - public void addInfoField(String key, String value) { - //System.out.printf("Adding info field %s=%s%n", key, value); - mInfoFields.put(key, value); - } - - public void printInfoFields() { - for ( Map.Entry e : mInfoFields.entrySet() ) { - System.out.printf(" Current info field %s=%s this=%s%n", e.getKey(), e.getValue(), this); - } - } - - - /** - * add an info field to the record - * - * @param m A map from info keys to info values - */ - public void addInfoFields(Map m) { - for ( Map.Entry e : m.entrySet() ) - addInfoField(e.getKey(), e.getValue()); - } - - /** - * the generation of a string representation, which is used by the VCF writer - * - * @param header the VCF header for this VCF Record - * @return a string - */ - public String toStringEncoding(VCFHeader header) { - return toStringEncoding(header, VCFGenotypeWriter.VALIDATION_STRINGENCY.STRICT); - } - - /** - * the generation of a string representation, which is used by the VCF writer - * - * @param header the VCF header for this VCF Record - * @param validationStringency the validation stringency - * @return a string - */ - public String toStringEncoding(VCFHeader header, VCFGenotypeWriter.VALIDATION_STRINGENCY validationStringency) { - StringBuilder builder = new StringBuilder(); - - // CHROM \t POS \t ID \t REF \t ALT \t QUAL \t FILTER \t INFO - builder.append(mLoc.getContig()); - builder.append(FIELD_SEPERATOR); - builder.append(mLoc.getStart()); - builder.append(FIELD_SEPERATOR); - builder.append(getID()); - builder.append(FIELD_SEPERATOR); - builder.append(getReference()); - builder.append(FIELD_SEPERATOR); - List alts = getAlternateAlleles(); - if ( alts.size() > 0 ) { - builder.append(alts.get(0)); - for ( int i = 1; i < alts.size(); i++ ) { - builder.append(","); - builder.append(alts.get(i)); - } - } else { - builder.append(EMPTY_ALLELE_FIELD); - } - builder.append(FIELD_SEPERATOR); - if ( (int)mQual == MISSING_GENOTYPE_QUALITY ) - builder.append(MISSING_GENOTYPE_QUALITY); - else - builder.append(String.format(DOUBLE_PRECISION_FORMAT_STRING, mQual)); - builder.append(FIELD_SEPERATOR); - builder.append(Utils.join(FILTER_CODE_SEPERATOR, getFilteringCodes())); - builder.append(FIELD_SEPERATOR); - builder.append(createInfoString()); - - if ( mGenotypeFormatString != null && mGenotypeFormatString.length() > 0 ) { -// try { - addGenotypeData(builder, header); -// } catch (Exception e) { -// if ( validationStringency == VCFGenotypeWriter.VALIDATION_STRINGENCY.STRICT ) -// throw new RuntimeException(e); -// } - } - - return builder.toString(); - } - - /** - * create the info string - * - * @return a string representing the infomation fields - */ - protected String createInfoString() { - StringBuffer info = new StringBuffer(); - boolean isFirst = true; - for (String str : mInfoFields.keySet()) { - if ( isFirst ) - isFirst = false; - else - info.append(INFO_FIELD_SEPERATOR); - info.append(str); - info.append("="); - info.append(mInfoFields.get(str)); - } - return info.length() == 0 ? EMPTY_INFO_FIELD : info.toString(); - } - - /** - * add the genotype data - * - * @param builder the string builder - * @param header the header object - */ - private void addGenotypeData(StringBuilder builder, VCFHeader header) { - Map gMap = genotypeListToMap(getVCFGenotypeRecords()); - - StringBuffer tempStr = new StringBuffer(); - if ( header.getGenotypeSamples().size() < getVCFGenotypeRecords().size() ) { - for ( String sample : gMap.keySet() ) { - if ( !header.getGenotypeSamples().contains(sample) ) - System.err.println("Sample " + sample + " is a duplicate or is otherwise not present in the header"); - else - header.getGenotypeSamples().remove(sample); - } - throw new IllegalStateException("We have more genotype samples than the header specified; please check that samples aren't duplicated"); - } - tempStr.append(FIELD_SEPERATOR + mGenotypeFormatString); - - String[] genotypeFormatStrings = mGenotypeFormatString.split(":"); - - for ( String genotype : header.getGenotypeSamples() ) { - tempStr.append(FIELD_SEPERATOR); - if ( gMap.containsKey(genotype) ) { - VCFGenotypeRecord rec = gMap.get(genotype); - tempStr.append(rec.toStringEncoding(mAlts, genotypeFormatStrings)); - gMap.remove(genotype); - } else { - tempStr.append(VCFGenotypeRecord.stringEncodingForEmptyGenotype(genotypeFormatStrings)); - } - } - if ( gMap.size() != 0 ) { - for ( String sample : gMap.keySet() ) - System.err.println("Sample " + sample + " is being genotyped but isn't in the header."); - throw new IllegalStateException("We failed to use all the genotype samples; there must be an inconsistancy between the header and records"); - } - - builder.append(tempStr); - } - - /** - * compare two VCF records - * - * @param other the other VCF record - * @return true if they're equal - */ - public boolean equals(VCFRecord other) { - if (!this.mAlts.equals(other.mAlts)) return false; - if (!this.mReferenceBases.equals(other.mReferenceBases)) return false; - if (!this.mLoc.equals(other.mLoc)) return false; - if (!this.mID.equals(other.mID)) return false; - if (this.mQual != other.mQual) return false; - if ( this.mFilterString == null ) { - if ( other.mFilterString != null ) return false; - } else if ( !this.mFilterString.equals(other.mFilterString) ) return false; - if (!this.mInfoFields.equals(other.mInfoFields)) return false; - if (!this.mGenotypeRecords.equals(other.mGenotypeRecords)) return false; - return true; - } - - /** - * create a genotype mapping from a list and their sample names - * - * @param list a list of genotype samples - * @return a mapping of the sample name to VCF genotype record - */ - private static Map genotypeListToMap(List list) { - Map map = new HashMap(); - for (int i = 0; i < list.size(); i++) { - VCFGenotypeRecord rec = list.get(i); - map.put(rec.getSampleName(), rec); - } - return map; - } - -} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java index b43cb7a11..6e7963f23 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java @@ -25,10 +25,11 @@ package org.broadinstitute.sting.utils.genotype.vcf; +import org.broad.tribble.vcf.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; -import org.broadinstitute.sting.gatk.refdata.RodVCF; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.Utils; @@ -48,7 +49,7 @@ public class VCFUtils { for ( ReferenceOrderedDataSource source : toolkit.getRodDataSources() ) { RMDTrack rod = source.getReferenceOrderedData(); - if ( rod.getType().equals(RodVCF.class) ) { + if ( rod.getType().equals(VCFRecord.class) ) { vcfs.add(rod); } } @@ -72,7 +73,7 @@ public class VCFUtils { List dataSources = toolkit.getRodDataSources(); for ( ReferenceOrderedDataSource source : dataSources ) { RMDTrack rod = source.getReferenceOrderedData(); - if ( rod.getType().equals(RodVCF.class) ) { + if ( rod.getType().equals(VCFCodec.class) ) { VCFReader reader = new VCFReader(rod.getFile()); fields.addAll(reader.getHeader().getMetaData()); reader.close(); @@ -89,7 +90,7 @@ public class VCFUtils { * @param rodNamesToSampleNames mapping of rod/sample pairs to new uniquified sample names * @return the new merged vcf record */ - public static VCFRecord mergeRecords(List rods, Map, String> rodNamesToSampleNames) { + public static VCFRecord mergeRecords(Map rods, Map, String> rodNamesToSampleNames) { VCFParameters params = new VCFParameters(); params.addFormatItem(VCFGenotypeRecord.GENOTYPE_KEY); @@ -100,14 +101,14 @@ public class VCFUtils { Map infoFields = new HashMap(); List filters = new ArrayList(); - for ( RodVCF rod : rods ) { + for ( VCFRecord rod : rods.keySet() ) { List myGenotypes = rod.getVCFGenotypeRecords(); for ( VCFGenotypeRecord call : myGenotypes ) { // set the name to be the new uniquified name and add it to the list of genotypes - call.setSampleName(rodNamesToSampleNames.get(new Pair(rod.getName(), call.getSampleName()))); + call.setSampleName(rodNamesToSampleNames.get(new Pair(rods.get(rod), call.getSampleName()))); if ( params.getPosition() < 1 ) - params.setLocations(rod.getLocation(), call.getReference()); - params.addGenotypeRecord(createVCFGenotypeRecord(params, call, rod.mCurrentRecord)); + params.setLocations(GenomeLocParser.createGenomeLoc(rod.getChr(), rod.getStart()), call.getReference()); + params.addGenotypeRecord(createVCFGenotypeRecord(params, call, rod)); } // set the overall confidence to be the max entry we see diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java index b6c2e56da..f2deeae47 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java @@ -1,6 +1,10 @@ package org.broadinstitute.sting.utils.genotype.vcf; +import org.broad.tribble.vcf.VCFHeader; +import org.broad.tribble.vcf.VCFHeaderLine; +import org.broad.tribble.vcf.VCFRecord; + import java.io.*; import java.util.TreeSet; @@ -97,7 +101,7 @@ public class VCFWriter { if ( mHeader == null ) throw new IllegalStateException("The VCF Header must be written before records can be added"); - String vcfString = record.toStringEncoding(mHeader, validationStringency); + String vcfString = record.toStringEncoding(mHeader); try { mWriter.write(vcfString + "\n"); mWriter.flush(); // necessary so that writing to an output stream will work diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/RodVCFUnitTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/RodVCFUnitTest.java deleted file mode 100755 index f73d850bb..000000000 --- a/java/test/org/broadinstitute/sting/gatk/refdata/RodVCFUnitTest.java +++ /dev/null @@ -1,181 +0,0 @@ -package org.broadinstitute.sting.gatk.refdata; - -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.genotype.vcf.VCFHeader; -import org.broadinstitute.sting.utils.genotype.vcf.VCFReader; -import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; -import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord; -import org.junit.Assert; -import static org.junit.Assert.fail; -import org.junit.BeforeClass; -import org.junit.Test; - -import java.io.File; -import java.io.FileNotFoundException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - - -/** - * @author aaron - *

- * Class RodVCFUnitTest - *

- * test out the rod VCF - */ -public class RodVCFUnitTest extends BaseTest { - - private static File vcfFile = new File(validationDataLocation + "vcfexample.vcf"); - private VCFHeader mHeader; - @BeforeClass - public static void beforeTests() { - IndexedFastaSequenceFile seq; - try { - seq = new IndexedFastaSequenceFile(new File(oneKGLocation + "reference/human_b36_both.fasta")); - } catch (FileNotFoundException e) { - throw new StingException("unable to load the sequence dictionary"); - } - GenomeLocParser.setupRefContigOrdering(seq); - - } - - private RodVCF getVCFObject() { - RodVCF vcf = new RodVCF("VCF"); - mHeader = null; - try { - mHeader = (VCFHeader) vcf.initialize(vcfFile); - } catch (FileNotFoundException e) { - fail("Unable to open VCF file"); - } - mHeader.checkVCFVersion(); - return vcf; - } - - @Test - public void testInitialize() { - getVCFObject(); - } - - @Test - public void testToIterator() { - RodVCF vcf = getVCFObject(); - VCFReader reader = new VCFReader(vcfFile); - Iterator iter = vcf.createIterator("VCF", vcfFile); - while (iter.hasNext()) { - VCFRecord rec1 = reader.next(); - VCFRecord rec2 = iter.next().mCurrentRecord; - if (!rec1.equals(rec2)) { - fail("VCF record rec1 != rec2"); - } - } - } - - @Test - public void testToMAF() { - RodVCF vcf = getVCFObject(); - Iterator iter = vcf.createIterator("VCF", vcfFile); - RodVCF rec = iter.next(); - Assert.assertEquals(rec.getNonRefAlleleFrequency(), 0.786, 0.00001); - rec = iter.next(); - rec = iter.next(); - rec = iter.next(); - Assert.assertEquals(rec.getNonRefAlleleFrequency(), 0.0, 0.00001); - } - - @Test - public void testToString() { - // slightly altered line, due to map ordering - final String firstLine = "20\t14370\trs6054257\tG\tA\t29.00\t0\tAF=0.786;DP=258;NS=58\tGT:GQ:DP:HQ\t0|0:48:1:51,51\t1|0:48:8:51,51\t1/1:43:5:\n"; - RodVCF vcf = getVCFObject(); - VCFReader reader = new VCFReader(vcfFile); - Iterator iter = vcf.createIterator("VCF", vcfFile); - boolean first = true; - while (iter.hasNext()) { - VCFRecord rec1 = reader.next(); - VCFRecord rec2 = iter.next().mCurrentRecord; - if (!rec1.toStringEncoding(mHeader).equals(rec2.toStringEncoding(mHeader))) { - fail("VCF record rec1.toStringEncoding() != rec2.toStringEncoding()"); - } - // verify the first line too - if (first) { - if (!firstLine.equals(rec1.toStringEncoding(mHeader) + "\n")) { - fail("VCF record rec1.toStringEncoding() != expected string :\n" + rec1.toStringEncoding(mHeader) + "\n" + firstLine); - } - first = false; - } - } - } - - @Test - public void testInsertion() { - RodVCF vcf = getVCFObject(); - Iterator iter = vcf.createIterator("VCF", vcfFile); - RodVCF rec = iter.next(); - Assert.assertTrue(rec.isSNP()); - rec = iter.next(); - rec = iter.next(); - rec = iter.next(); - rec = iter.next(); - Assert.assertTrue(rec.isIndel()); - Assert.assertTrue(rec.isDeletion()); - } - - @Test - public void testDeletion() { - RodVCF vcf = getVCFObject(); - Iterator iter = vcf.createIterator("VCF", vcfFile); - RodVCF rec = iter.next(); - Assert.assertTrue(rec.isSNP()); - rec = iter.next(); - rec = iter.next(); - rec = iter.next(); - rec = iter.next(); - rec = iter.next(); - - Assert.assertTrue(rec.isIndel()); - Assert.assertTrue(rec.isInsertion()); - } - @Test - public void testGetGenotypes() { - RodVCF vcf = getVCFObject(); - Iterator iter = vcf.createIterator("VCF", vcfFile); - RodVCF rec = iter.next(); - // we should get back a ref 'G' and an alt 'A' - List list = rec.getVCFGenotypeRecords(); - List knowngenotypes = new ArrayList(); - knowngenotypes.add("GG"); - knowngenotypes.add("AG"); - knowngenotypes.add("AA"); - Assert.assertEquals(3, list.size()); - for (VCFGenotypeRecord g: list) { - Assert.assertTrue(knowngenotypes.contains(g.getBases())); - } - } - @Test - public void testGetGenotypesQual() { - RodVCF vcf = getVCFObject(); - Iterator iter = vcf.createIterator("VCF", vcfFile); - RodVCF rec = iter.next(); - // we should get back a ref 'G' and an alt 'A' - List list = rec.getVCFGenotypeRecords(); - Assert.assertEquals(4.8,list.get(0).getNegLog10PError(),0.0001); - Assert.assertEquals(4.8,list.get(1).getNegLog10PError(),0.0001); - Assert.assertEquals(4.3,list.get(2).getNegLog10PError(),0.0001); - } - @Test - public void testGetLocation() { - RodVCF vcf = getVCFObject(); - Iterator iter = vcf.createIterator("VCF", vcfFile); - RodVCF rec = iter.next(); - GenomeLoc loc = rec.getLocation(); - Assert.assertEquals(14370,loc.getStart()); - Assert.assertEquals(14370,loc.getStop()); - Assert.assertTrue(loc.getContig().equals("20")); - } - -} diff --git a/java/test/org/broadinstitute/sting/playground/gatk/walkers/validation/RodSystemValidationIntegrationTest.java b/java/test/org/broadinstitute/sting/playground/gatk/walkers/validation/RodSystemValidationIntegrationTest.java index c8e8e8a0b..8893f17bb 100644 --- a/java/test/org/broadinstitute/sting/playground/gatk/walkers/validation/RodSystemValidationIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/playground/gatk/walkers/validation/RodSystemValidationIntegrationTest.java @@ -27,7 +27,7 @@ public class RodSystemValidationIntegrationTest extends WalkerTest { public void testSimpleVCFPileup() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString1KG() + " -B eval,VCF," + validationDataLocation + "MultiSample.vcf", 1, - Arrays.asList("ff4da30a3b6428bc64d61214c715efa5")); + Arrays.asList("5d84c75746738833b6c9441d9d614553")); executeTest("testSimpleVCFPileup", spec); } @@ -37,7 +37,7 @@ public class RodSystemValidationIntegrationTest extends WalkerTest { baseTestString1KG() + " -B eval,VCF," + validationDataLocation + "MultiSample.vcf" + " -B eval,VCF," + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf" , 1, - Arrays.asList("52f758a70ae603213d4399220151357d")); + Arrays.asList("6dd0ed0a6fe7096ccb66beffb8d455da")); executeTest("testComplexVCFPileup", spec); } @@ -49,7 +49,7 @@ public class RodSystemValidationIntegrationTest extends WalkerTest { " -B eval,VCF," + validationDataLocation + "CEU_hapmap_nogt_23.vcf" + " -L 1 -L 2 -L 20" , 1, - Arrays.asList("421a27be91496e88f2f4e197dcfad0ff")); + Arrays.asList("ab3da32eae65e8c15a9f4a787a190a37")); executeTest("testLargeComplexVCFPileup", spec); } } diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeEncodingUnitTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeEncodingUnitTest.java deleted file mode 100644 index 4314287d4..000000000 --- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeEncodingUnitTest.java +++ /dev/null @@ -1,151 +0,0 @@ -package org.broadinstitute.sting.utils.genotype.vcf; - -import org.broadinstitute.sting.BaseTest; -import org.junit.Assert; -import org.junit.Test; - - -/** - * @author aaron - *

- * Class VCFGenotypeEncodingUnitTest - *

- * test the VCFGenotypeEncoding class - */ -public class VCFGenotypeEncodingUnitTest extends BaseTest { - @Test - public void testDecodingSingle() { - VCFGenotypeEncoding enc = new VCFGenotypeEncoding("A"); - Assert.assertTrue("A".equals(enc.toString())); - Assert.assertEquals(0, enc.getLength()); - Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType()); - - VCFGenotypeEncoding enc2 = new VCFGenotypeEncoding("C"); - Assert.assertTrue("C".equals(enc2.toString())); - Assert.assertEquals(0, enc.getLength()); - Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType()); - - VCFGenotypeEncoding enc3 = new VCFGenotypeEncoding("G"); - Assert.assertTrue("G".equals(enc3.toString())); - Assert.assertEquals(0, enc.getLength()); - Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType()); - - VCFGenotypeEncoding enc4 = new VCFGenotypeEncoding("T"); - Assert.assertTrue("T".equals(enc4.toString())); - Assert.assertEquals(0, enc.getLength()); - Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType()); - - VCFGenotypeEncoding enc5 = new VCFGenotypeEncoding("a"); - Assert.assertTrue("A".equals(enc5.toString())); - Assert.assertEquals(0, enc.getLength()); - Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType()); - - VCFGenotypeEncoding enc6 = new VCFGenotypeEncoding("c"); - Assert.assertTrue("C".equals(enc6.toString())); - Assert.assertEquals(0, enc.getLength()); - Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType()); - - VCFGenotypeEncoding enc7 = new VCFGenotypeEncoding("g"); - Assert.assertTrue("G".equals(enc7.toString())); - Assert.assertEquals(0, enc.getLength()); - Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType()); - - VCFGenotypeEncoding enc8 = new VCFGenotypeEncoding("t"); - Assert.assertTrue("T".equals(enc8.toString())); - Assert.assertEquals(0, enc.getLength()); - Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType()); - } - - @Test(expected = IllegalArgumentException.class) - public void testDecodingSingleBadBase() { - VCFGenotypeEncoding enc = new VCFGenotypeEncoding("E"); - } - - @Test(expected = IllegalArgumentException.class) - public void testDecodingSingleWrongBase() { - VCFGenotypeEncoding enc = new VCFGenotypeEncoding("I"); - } - - @Test - public void testValidIndel() { - VCFGenotypeEncoding enc = new VCFGenotypeEncoding("IAGGC"); - Assert.assertEquals(4, enc.getLength()); - Assert.assertTrue(enc.getBases().equals("AGGC")); - Assert.assertEquals(VCFGenotypeEncoding.TYPE.INSERTION, enc.getType()); - } - - @Test(expected = IllegalArgumentException.class) - public void testBadIndel() { - VCFGenotypeEncoding enc = new VCFGenotypeEncoding("IAGRC"); - } - - @Test - public void testValidDel() { - VCFGenotypeEncoding enc = new VCFGenotypeEncoding("D40"); - Assert.assertEquals(40, enc.getLength()); - Assert.assertTrue(enc.getBases().equals("")); - Assert.assertEquals(VCFGenotypeEncoding.TYPE.DELETION, enc.getType()); - } - - @Test(expected = IllegalArgumentException.class) - public void testBadDel() { - VCFGenotypeEncoding enc = new VCFGenotypeEncoding("DAGCT"); - } - - @Test - public void testValidNoCall() { - VCFGenotypeEncoding enc = new VCFGenotypeEncoding("."); - Assert.assertEquals(0, enc.getLength()); - Assert.assertTrue(enc.getBases().equals(".")); - Assert.assertEquals(VCFGenotypeEncoding.TYPE.UNCALLED, enc.getType()); - } - - @Test(expected = IllegalArgumentException.class) - public void testBadNoCall() { - VCFGenotypeEncoding enc = new VCFGenotypeEncoding(".."); - } - - @Test - public void testEquals() { - VCFGenotypeEncoding enc = new VCFGenotypeEncoding("A"); - VCFGenotypeEncoding enc2 = new VCFGenotypeEncoding("A"); - VCFGenotypeEncoding enc3 = new VCFGenotypeEncoding("C"); - Assert.assertTrue(enc.equals(enc2)); - Assert.assertTrue(!enc.equals(enc3)); - enc = new VCFGenotypeEncoding("D40"); - enc2 = new VCFGenotypeEncoding("D40"); - enc3 = new VCFGenotypeEncoding("D41"); - Assert.assertTrue(enc.equals(enc2)); - Assert.assertTrue(!enc.equals(enc3)); - enc = new VCFGenotypeEncoding("IAAC"); - enc2 = new VCFGenotypeEncoding("IAAC"); - enc3 = new VCFGenotypeEncoding("IACG"); - Assert.assertTrue(enc.equals(enc2)); - Assert.assertTrue(!enc.equals(enc3)); - enc = new VCFGenotypeEncoding("."); - enc2 = new VCFGenotypeEncoding("."); - Assert.assertTrue(enc.equals(enc2)); - } - - @Test - public void testHashCode() { - VCFGenotypeEncoding enc = new VCFGenotypeEncoding("A"); - VCFGenotypeEncoding enc2 = new VCFGenotypeEncoding("A"); - VCFGenotypeEncoding enc3 = new VCFGenotypeEncoding("C"); - Assert.assertTrue(enc.hashCode() == enc2.hashCode()); - Assert.assertTrue(enc.hashCode() != enc3.hashCode()); - enc = new VCFGenotypeEncoding("D40"); - enc2 = new VCFGenotypeEncoding("D40"); - enc3 = new VCFGenotypeEncoding("D41"); - Assert.assertTrue(enc.hashCode() == enc2.hashCode()); - Assert.assertTrue(enc.hashCode() != enc3.hashCode()); - enc = new VCFGenotypeEncoding("IAAC"); - enc2 = new VCFGenotypeEncoding("IAAC"); - enc3 = new VCFGenotypeEncoding("IACG"); - Assert.assertTrue(enc.hashCode() == enc2.hashCode()); - Assert.assertTrue(enc.hashCode() != enc3.hashCode()); - enc = new VCFGenotypeEncoding("."); - enc2 = new VCFGenotypeEncoding("."); - Assert.assertTrue(enc.hashCode() == enc2.hashCode()); - } -} diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderUnitTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderUnitTest.java deleted file mode 100644 index ca5a340fb..000000000 --- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderUnitTest.java +++ /dev/null @@ -1,52 +0,0 @@ -package org.broadinstitute.sting.utils.genotype.vcf; - -import org.broadinstitute.sting.BaseTest; -import org.junit.Assert; -import org.junit.Test; - -import java.util.*; - - -/** - * - * @author aaron - * - * Class VCFHeaderUnitTest - * - * Test the VCF Header class - */ -public class VCFHeaderUnitTest extends BaseTest { - - private Set metaData = new HashSet(); - private Set additionalColumns = new HashSet(); - - /** - * give it fake data, and make sure we get back the right fake data - */ - @Test - public void testHeaderConstructor() { - metaData.add(new VCFHeaderLine(VCFHeader.FILE_FORMAT_KEY, VCFHeader.VCF_VERSION)); - metaData.add(new VCFHeaderLine("two", "2")); - additionalColumns.add("extra1"); - additionalColumns.add("extra2"); - // this should create a header that is valid - - VCFHeader header = new VCFHeader(metaData, additionalColumns); - - // check the fields - int index = 0; - for (VCFHeader.HEADER_FIELDS field : header.getHeaderFields()) { - Assert.assertEquals(VCFHeader.HEADER_FIELDS.values()[index],field); - index++; - } - Assert.assertEquals(metaData.size(), header.getMetaData().size()); - index = 0; - for (String key: header.getGenotypeSamples()) { - Assert.assertTrue(additionalColumns.contains(key)); - index++; - } - Assert.assertEquals(additionalColumns.size(),index); - } - - -} diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFIntegrationTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFIntegrationTest.java deleted file mode 100755 index 37aa3334e..000000000 --- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFIntegrationTest.java +++ /dev/null @@ -1,18 +0,0 @@ -package org.broadinstitute.sting.utils.genotype.vcf; - -import org.broadinstitute.sting.WalkerTest; -import org.junit.Test; - -import java.util.Arrays; - -public class VCFIntegrationTest extends WalkerTest { - - @Test - public void test1() { - // Read in and then emit each record - WalkerTestSpec spec = new WalkerTestSpec( - "-T PrintRODs -R " + oneKGLocation + "reference/human_b36_both.fasta -L 1:10,000,000-10,050,000 -o %s -B vcf,VCF," + validationDataLocation + "complexExample.vcf", 1, - Arrays.asList("68b123acca4975553297fcd776c70464")); - executeTest("test vcf", spec); - } -} \ No newline at end of file diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFReaderUnitTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFReaderUnitTest.java deleted file mode 100644 index c419f49e0..000000000 --- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFReaderUnitTest.java +++ /dev/null @@ -1,366 +0,0 @@ -package org.broadinstitute.sting.utils.genotype.vcf; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.FileReader; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.refdata.RodVCF; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Test; - -/** test the VCFReader class test */ -public class VCFReaderUnitTest extends BaseTest { - - private static final File vcfFile = new File(validationDataLocation + "vcfexample.vcf"); - private static final File multiSampleVCF = new File(validationDataLocation + "MultiSample.vcf"); - private static final String VCF_MIXUP_FILE = validationDataLocation + "mixedup.v2.vcf"; - private static final File complexFile = new File(validationDataLocation + "complexExample.vcf"); - private static final File headerNoRecordsFile = new File(validationDataLocation + "headerNoRecords.vcf"); - private static final File headerSampleSpaceFile = new File(validationDataLocation + "headerSampleSpaceFile.vcf"); - - - @BeforeClass - public static void beforeTests() { - try { - IndexedFastaSequenceFile seq = new IndexedFastaSequenceFile(new File(oneKGLocation + "reference/human_b36_both.fasta")); - GenomeLocParser.setupRefContigOrdering(seq); - } catch (FileNotFoundException e) { - throw new StingException("unable to load the sequence dictionary"); - } - } - - @Test - public void testVCFInput() { - VCFReader reader = new VCFReader(vcfFile); - int counter = 0; - while (reader.hasNext()) { - counter++; - reader.next(); - } - Assert.assertEquals(6, counter); - } - - @Test - public void testMultiSampleVCFInput() { - VCFReader reader = new VCFReader(multiSampleVCF); - int counter = 0; - while (reader.hasNext()) { - counter++; - reader.next(); - } - Assert.assertEquals(99, counter); - } - - @Test - public void testNoCallSites() { - VCFReader reader = new VCFReader(multiSampleVCF); - if (!reader.hasNext()) Assert.fail("The reader should have a record"); - VCFRecord rec = reader.next(); - final int numberOfNoCallsTruth = 9; - int noCalls = 0; - for (VCFGenotypeRecord record : rec.getVCFGenotypeRecords()) { - List encodings = record.getAlleles(); - if (encodings.get(0).getType() == VCFGenotypeEncoding.TYPE.UNCALLED && - encodings.get(1).getType() == VCFGenotypeEncoding.TYPE.UNCALLED) - noCalls++; - } - Assert.assertEquals(numberOfNoCallsTruth, noCalls); - } - - - @Test - public void testKnownCallSites() { - VCFReader reader = new VCFReader(multiSampleVCF); - if (!reader.hasNext()) Assert.fail("The reader should have a record"); - VCFRecord rec = reader.next(); - boolean seenNA11992 = false; - boolean seenNA12287 = false; - for (VCFGenotypeRecord record : rec.getVCFGenotypeRecords()) { - if (record.getSampleName().equals("NA11992")) { - List encodings = record.getAlleles(); - if (!encodings.get(0).getBases().equals("A") || - !encodings.get(1).getBases().equals("A")) { - Assert.fail("Sample NA11992 at site 1:10000005 should be AA"); - } - seenNA11992 = true; - } - if (record.getSampleName().equals("NA12287")) { - List encodings = record.getAlleles(); - if (!encodings.get(0).getBases().equals("A") || - !encodings.get(1).getBases().equals("T")) { - Assert.fail("Sample NA11992 at site 1:10000005 should be AA"); - } - seenNA12287 = true; - } - } - Assert.assertTrue(seenNA11992 && seenNA12287); - } - - /** test the basic parsing */ - @Test - public void testBasicParsing() { - String formatString = "GT:B:C:D"; - String genotypeString = "0|1:2:3:4"; - String altAlleles[] = {"A", "G", "T"}; - char referenceBase = 'C'; - VCFGenotypeRecord rec = VCFReader.getVCFGenotype("test", formatString, genotypeString, altAlleles, referenceBase); - Assert.assertEquals(VCFGenotypeRecord.PHASE.PHASED, rec.getPhaseType()); - Assert.assertEquals("C", rec.getAlleles().get(0).toString()); - Assert.assertEquals("A", rec.getAlleles().get(1).toString()); - Map values = rec.getFields(); - Assert.assertEquals(3, values.size()); - Assert.assertTrue(values.get("B").equals("2")); - Assert.assertTrue(values.get("C").equals("3")); - Assert.assertTrue(values.get("D").equals("4")); - } - - - /** test the parsing of a genotype field with missing parameters */ - @Test - public void testMissingFieldParsing() { - String formatString = "GT:B:C:D"; - String genotypeString = "0|1:::4"; - String altAlleles[] = {"A", "G", "T"}; - char referenceBase = 'C'; - VCFGenotypeRecord rec = VCFReader.getVCFGenotype("test", formatString, genotypeString, altAlleles, referenceBase); - Assert.assertEquals(VCFGenotypeRecord.PHASE.PHASED, rec.getPhaseType()); - Assert.assertEquals("C", rec.getAlleles().get(0).toString()); - Assert.assertEquals("A", rec.getAlleles().get(1).toString()); - Map values = rec.getFields(); - Assert.assertEquals(3, values.size()); - Assert.assertTrue(values.get("B").equals("")); - Assert.assertTrue(values.get("C").equals("")); - Assert.assertTrue(values.get("D").equals("4")); - } - - /** test the parsing of a genotype field with different missing parameters */ - @Test - public void testMissingAllFields() { - String formatString = "GT:B:C:D"; - String genotypeString = "0|1:::"; - String altAlleles[] = {"A", "G", "T"}; - char referenceBase = 'C'; - VCFGenotypeRecord rec = VCFReader.getVCFGenotype("test", formatString, genotypeString, altAlleles, referenceBase); - Assert.assertEquals(VCFGenotypeRecord.PHASE.PHASED, rec.getPhaseType()); - Assert.assertEquals("C", rec.getAlleles().get(0).toString()); - Assert.assertEquals("A", rec.getAlleles().get(1).toString()); - Map values = rec.getFields(); - Assert.assertEquals(3, values.size()); - Assert.assertTrue(values.get("B").equals("")); - Assert.assertTrue(values.get("C").equals("")); - Assert.assertTrue(values.get("D").equals("")); - } - - @Test - public void testKiransOutOfOrderExample() { - ArrayList header = new ArrayList(); - ArrayList variant = new ArrayList(); - - try { - FileReader reader = new FileReader(VCF_MIXUP_FILE); - BufferedReader breader = new BufferedReader(reader); - String line; - while ((line = breader.readLine()) != null) { - String[] pieces = line.split("\t"); - - if (line.contains("##")) { - continue; - } else if (line.contains("#CHROM")) { - for (int i = 0; i < pieces.length; i++) { - header.add(i, pieces[i]); - } - } else { - for (int i = 0; i < pieces.length; i++) { - variant.add(i, pieces[i]); - } - } - } - } catch (FileNotFoundException e) { - Assert.fail("Unable to open the mixed up VCF file."); - } catch (IOException e) { - Assert.fail("Unable to read from the mixed up VCF file."); - } - - // now load up a ROD - Iterator rod = RodVCF.createIterator("name",new File(VCF_MIXUP_FILE)); - RodVCF newRod = rod.next(); - List records = newRod.mCurrentRecord.getVCFGenotypeRecords(); - for (VCFGenotypeRecord record: records) { - if (!header.contains(record.getSampleName())) { - Assert.fail("Parsed header doesn't contain field " + record.getSampleName()); - } - if (!variant.get(header.indexOf(record.getSampleName())).equals(record.toStringEncoding(newRod.mCurrentRecord.getAlternateAlleles(), newRod.mCurrentRecord.getGenotypeFormatString().split(":")))) { - Assert.fail("Parsed value for " + record.getSampleName() + " doesn't contain the same value ( " + record.toStringEncoding(newRod.mCurrentRecord.getAlternateAlleles(), newRod.mCurrentRecord.getGenotypeFormatString().split(":")) - + " != " + variant.get(header.indexOf(record.getSampleName()))); - } - } - } - - @Test - public void testComplexExample() { - VCFReader reader = new VCFReader(complexFile); - - // test header - Set headerLines = reader.getHeader().getMetaData(); - int formatCount = 0; - int infoCount = 0; - int filterCount = 0; - for ( VCFHeaderLine line : headerLines ) { - if ( line instanceof VCFFormatHeaderLine ) - formatCount++; - else if ( line instanceof VCFInfoHeaderLine ) - infoCount++; - else if ( line instanceof VCFFilterHeaderLine ) - filterCount++; - } - Assert.assertEquals(3, formatCount); - Assert.assertEquals(4, infoCount); - Assert.assertEquals(0, filterCount); - - // record #1: test dbsnp id - if (!reader.hasNext()) Assert.fail("The reader should have a record"); - VCFRecord rec = reader.next(); - Assert.assertTrue(rec.getID().equals("testid1")); - List grecords = rec.getVCFGenotypeRecords(); - int counter = 0; - for ( VCFGenotypeRecord grec : grecords ) { - if ( grec.isEmptyGenotype() ) - counter++; - } - Assert.assertEquals(2, counter); - - // record #2: test info field - if (!reader.hasNext()) Assert.fail("The reader should have a record"); - rec = reader.next(); - Assert.assertTrue(rec.getInfoValues().get("RMSMAPQ").equals("81.91")); - - // record #3: test alternate alleles - if (!reader.hasNext()) Assert.fail("The reader should have a record"); - rec = reader.next(); - List alts = rec.getAlternateAlleleList(); - Assert.assertTrue((alts.get(0).equals("T") && alts.get(1).equals("C")) || (alts.get(0).equals("C") && alts.get(1).equals("T"))); - - // record #4: test max GQ value and read count - if (!reader.hasNext()) Assert.fail("The reader should have a record"); - rec = reader.next(); - grecords = rec.getVCFGenotypeRecords(); - for ( VCFGenotypeRecord grec : grecords ) { - if ( !grec.isEmptyGenotype() ) { - Assert.assertTrue(grec.getFields().get("GQ").equals("2000000")); - Assert.assertEquals(7, grec.getReadCount()); - } - } - - // record #5: test more alternate alleles - if (!reader.hasNext()) Assert.fail("The reader should have a record"); - rec = reader.next(); - alts = rec.getAlternateAlleleList(); - Assert.assertEquals(alts.size(), 3); - grecords = rec.getVCFGenotypeRecords(); - boolean[] seen = new boolean[3]; - for ( VCFGenotypeRecord grec : grecords ) { - if ( grec.getBases().equals("CC") ) { - Assert.assertEquals(27, grec.getReadCount()); - seen[0] = true; - } else if ( grec.getBases().equals("AA") ) { - Assert.assertEquals(26, grec.getReadCount()); - seen[1] = true; - } else if ( grec.getBases().equals("GT") || grec.getBases().equals("TG") ) { - Assert.assertEquals(16, grec.getReadCount()); - seen[2] = true; - } - } - Assert.assertTrue(seen[0] && seen[1] && seen[2]); - - // record #6: toVariation - if (!reader.hasNext()) Assert.fail("The reader should have a record"); - rec = reader.next(); - grecords = rec.getVCFGenotypeRecords(); - for ( VCFGenotypeRecord grec : grecords ) { - if ( !grec.isEmptyGenotype() ) { - Assert.assertTrue(grec.isVariant(rec.getReference().charAt(0))); - Assert.assertEquals(rec, grec.getRecord()); - } - } - - // record #7: default values, isVariation, and isHom/isHet - if (!reader.hasNext()) Assert.fail("The reader should have a record"); - rec = reader.next(); - grecords = rec.getVCFGenotypeRecords(); - for ( VCFGenotypeRecord grec : grecords ) { - if ( !grec.isVariant(rec.getReference().charAt(0)) ) { - Assert.assertTrue(grec.isHom()); - Assert.assertTrue(grec.getFields().get("GQ").equals("-1")); - Assert.assertEquals(-1, grec.getReadCount()); - } else { - Assert.assertTrue(grec.isHet()); - if ( grec.getReadCount() == 4 ) - Assert.assertTrue(grec.getFields().get("GQ").equals("-1")); - else - Assert.assertTrue(grec.getFields().get("GQ").equals("5.85") && grec.getReadCount() == -1); - } - } - - // record #8: filtered and type - if (!reader.hasNext()) Assert.fail("The reader should have a record"); - rec = reader.next(); - Assert.assertTrue(!rec.isFiltered()); - Assert.assertTrue(rec.getFilterString().equals(".")); - Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, rec.getType()); - - // record #9: deletion - if (!reader.hasNext()) Assert.fail("The reader should have a record"); - rec = reader.next(); - Assert.assertEquals(VCFGenotypeEncoding.TYPE.DELETION, rec.getType()); - Assert.assertEquals(1, rec.getAlternateAlleleList().size()); - Assert.assertTrue(rec.getAlternateAlleleList().get(0).equals("")); - - // record #10: insertion - if (!reader.hasNext()) Assert.fail("The reader should have a record"); - rec = reader.next(); - Assert.assertEquals(VCFGenotypeEncoding.TYPE.INSERTION, rec.getType()); - Assert.assertEquals(rec.getAlternateAlleleList().size(), 1); - Assert.assertTrue(rec.getAlternateAlleleList().get(0).equals("CAT")); - - // record #11: more filters - if (!reader.hasNext()) Assert.fail("The reader should have a record"); - rec = reader.next(); - Assert.assertTrue(rec.isFiltered()); - Assert.assertTrue(rec.getFilterString().equals("foo")); - - // record #12: yet more filters - if (!reader.hasNext()) Assert.fail("The reader should have a record"); - rec = reader.next(); - Assert.assertTrue(rec.isFiltered()); - Assert.assertTrue(rec.getFilterString().equals("bar;baz")); - - if (reader.hasNext()) Assert.fail("The reader should NOT have a record"); - } - - @Test - public void testHeaderNoRecords() { - VCFReader reader = new VCFReader(headerNoRecordsFile); - Assert.assertTrue(reader.getHeader().getMetaData() != null); - Assert.assertTrue(!reader.iterator().hasNext()); - } - - @Test - public void testHeaderSampleSpaceFile() { - VCFReader reader = new VCFReader(headerSampleSpaceFile); - Assert.assertTrue(reader.getHeader().hasGenotypingData()); - Assert.assertTrue(reader.getHeader().getGenotypeSamples().size() == 1); - Assert.assertTrue(reader.getHeader().getGenotypeSamples().contains("SAMPLE NAME")); - } -} diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFRecordUnitTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFRecordUnitTest.java deleted file mode 100755 index c71ec5ed6..000000000 --- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFRecordUnitTest.java +++ /dev/null @@ -1,169 +0,0 @@ -package org.broadinstitute.sting.utils.genotype.vcf; - -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.junit.Assert; -import org.junit.Test; -import org.junit.BeforeClass; - -import java.util.*; -import java.io.File; -import java.io.FileNotFoundException; - - -/** - * @author aaron - *

- * Class VCFRecordUnitTest - *

- * test the basic functionality of the vcf record - */ -public class VCFRecordUnitTest extends BaseTest { - - @BeforeClass - public static void beforeTests() { - try { - IndexedFastaSequenceFile seq = new IndexedFastaSequenceFile(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")); - GenomeLocParser.setupRefContigOrdering(seq); - } catch (FileNotFoundException e) { - throw new StingException("unable to load the sequence dictionary"); - } - } - - /** - * create a fake VCF record - * - * @param infoFields the info fields - * @return a VCFRecord - */ - private static VCFRecord makeFakeVCFRecord(Map infoFields) { - List altBases = new ArrayList(); - altBases.add(new VCFGenotypeEncoding("C")); - altBases.add(new VCFGenotypeEncoding("D1")); - List genotypeObjects = new ArrayList(); - genotypeObjects.add(createGenotype("sample1", "A", "A")); - return new VCFRecord("A", "chr1", 1, "RANDOM", altBases, 0, ".", infoFields, "GT:AA", genotypeObjects); - } - - /** - * create a fake VCF genotype record - * - * @param name the name of the sample - * @param Allele1 the first allele - * @param Allele2 the second allele - * @return a VCFGenotypeRecord - */ - private static VCFGenotypeRecord createGenotype(String name, String Allele1, String Allele2) { - List Alleles = new ArrayList(); - Alleles.add(new VCFGenotypeEncoding(Allele1)); - Alleles.add(new VCFGenotypeEncoding(Allele2)); - VCFGenotypeRecord rec = new VCFGenotypeRecord(name, Alleles, VCFGenotypeRecord.PHASE.PHASED); - rec.setField("AA", "2"); - return rec; - } - - @Test - public void testAddReduntantAlts() { - List altBases = new ArrayList(); - altBases.add(new VCFGenotypeEncoding("C")); - altBases.add(new VCFGenotypeEncoding("D1")); - altBases.add(new VCFGenotypeEncoding("D1")); - List genotypeObjects = new ArrayList(); - genotypeObjects.add(createGenotype("sample1", "A", "A")); - VCFRecord rec = new VCFRecord("A", "chr1", 1, "RANDOM", altBases, 0, ".", new HashMap(), "GT:AA", genotypeObjects); - Assert.assertEquals(2, rec.getAlternateAlleles().size()); - } - - @Test - public void testGetOneGenotype() { - Map infoFields = new HashMap(); - VCFRecord rec = makeFakeVCFRecord(infoFields); - List genotypeObjects = rec.getVCFGenotypeRecords(); - Assert.assertEquals(1, genotypeObjects.size()); - Assert.assertTrue(genotypeObjects.get(0).getSampleName().equals("sample1")); - Assert.assertEquals(2, genotypeObjects.get(0).getAlleles().size()); - Assert.assertEquals("A", genotypeObjects.get(0).getAlleles().get(0).toString()); - Assert.assertEquals("A", genotypeObjects.get(0).getAlleles().get(1).toString()); - } - - @Test - public void testGetGenotypes() { - Map infoFields = new HashMap(); - VCFRecord rec = makeFakeVCFRecord(infoFields); - rec.addGenotypeRecord(createGenotype("sample2", "C", "A")); - List genotypeObjects = rec.getVCFGenotypeRecords(); - Assert.assertEquals(2, genotypeObjects.size()); - Assert.assertTrue(genotypeObjects.get(0).getSampleName().equals("sample1")); - Assert.assertEquals(2, genotypeObjects.get(0).getAlleles().size()); - Assert.assertEquals("A", genotypeObjects.get(0).getAlleles().get(0).toString()); - Assert.assertEquals("A", genotypeObjects.get(0).getAlleles().get(1).toString()); - - // assert the second one - Assert.assertTrue(genotypeObjects.get(1).getSampleName().equals("sample2")); - Assert.assertEquals(2, genotypeObjects.get(1).getAlleles().size()); - Assert.assertEquals("C", genotypeObjects.get(1).getAlleles().get(0).toString()); - Assert.assertEquals("A", genotypeObjects.get(1).getAlleles().get(1).toString()); - - } - - @Test - public void testCreateInfoString() { - Map infoFields = new HashMap(); - VCFRecord rec = makeFakeVCFRecord(infoFields); - Assert.assertTrue(rec.createInfoString().equals(".")); - infoFields.put("DP", "50"); - VCFRecord rec2 = makeFakeVCFRecord(infoFields); - Assert.assertTrue(rec2.createInfoString().equals("DP=50")); - rec2.addInfoField("AB", "CD"); - Assert.assertTrue(rec2.createInfoString().equals("DP=50;AB=CD") || rec2.createInfoString().equals("AB=CD;DP=50")); - } - - - @Test - public void testAddAlts() { - Map infoFields = new HashMap(); - VCFRecord rec = makeFakeVCFRecord(infoFields); - rec.addAlternateBase(new VCFGenotypeEncoding("T")); - rec.addAlternateBase(new VCFGenotypeEncoding("T")); - rec.addAlternateBase(new VCFGenotypeEncoding("T")); - rec.addAlternateBase(new VCFGenotypeEncoding("T")); - rec.addAlternateBase(new VCFGenotypeEncoding("T")); - Assert.assertEquals(3,rec.getAlternateAlleles().size()); - } - - /** - * create a fake header of known quantity - * - * @return a fake VCF header - */ - public static VCFHeader createFakeHeader() { - Set metaData = new HashSet(); - metaData.add(new VCFHeaderLine(VCFHeader.FILE_FORMAT_KEY, VCFHeader.VCF_VERSION)); - metaData.add(new VCFHeaderLine("two", "2")); - Set additionalColumns = new HashSet(); - additionalColumns.add("FORMAT"); - additionalColumns.add("sample1"); - return new VCFHeader(metaData, additionalColumns); - } - - private static final String stringRep = "chr1\t1\tRANDOM\tA\tC,D1\t0.00\t.\tDP=50\tGT:AA\t0|0:2"; - private static final String stringRep2 = "chr1\t1\tRANDOM\tA\tC,D1\t0.00\t.\tAB=CD;DP=50\tGT:AA\t0|0:2"; - //private static final String stringRep3 = "chr1\t1\tRANDOM\tA\tC,D1\t0.00\t.\tAB=CD;DP=50\tGT:AA\t0|0:2"; - - @Test - public void testStringRepresentation() { - Map infoFields = new HashMap(); - infoFields.put("DP", "50"); - VCFRecord rec = makeFakeVCFRecord(infoFields); - String rep = rec.toStringEncoding(createFakeHeader()); - Assert.assertTrue(stringRep.equals(rep)); - rec.addInfoField("AB", "CD"); - String rep2 = rec.toStringEncoding(createFakeHeader()); - Assert.assertTrue(stringRep2.equals(rep2)); - //rec.addGenotypeField(createGenotype("sample3","A","D12")); - } - - -} diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java index 113e12377..5089f2b76 100644 --- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.utils.genotype.vcf; +import org.broad.tribble.vcf.*; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import org.broadinstitute.sting.utils.StingException;