From 72ae81c6de7a111a7b2bcb36267ab60fa9baed9d Mon Sep 17 00:00:00 2001 From: aaron Date: Thu, 5 Aug 2010 18:47:53 +0000 Subject: [PATCH] VariantContext has now moved over to Tribble, and the VCF4 parser is now the only VCF parser in town. Other changes include: - Tribble is included directly in the GATK repo; those who have access to commit to Tribble can now directly commit from the GATK directory from Intellij; command line users can commit from inside the tribble directory. - Hapmap ROD now in Tribble; all mentions have been switched over. - VariantContext does not know about GenomeLoc; use VariantContextUtils.getLocation(VariantContext vc) to get a genome loc. - VariantContext.getSNPSubstitutionType is now in VariantContextUtils. - This does not include the checked-in project files for Intellij; still running into issues with changes to the iml files being marked as changes by SVN I'll send out an email to GSAMembers with some more details. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3954 348d0f76-0448-11de-a6fe-93d51630548a --- build.xml | 49 +- ivy.xml | 1 - .../org/broad/tribble/vcf/NameAwareCodec.java | 37 - java/src/org/broad/tribble/vcf/VCF3Codec.java | 119 -- java/src/org/broad/tribble/vcf/VCFCodec.java | 18 - .../tribble/vcf/VCFCompoundHeaderLine.java | 164 --- .../org/broad/tribble/vcf/VCFConstants.java | 84 -- .../tribble/vcf/VCFFilterHeaderLine.java | 70 -- .../tribble/vcf/VCFFormatHeaderLine.java | 28 - .../tribble/vcf/VCFGenotypeEncoding.java | 136 --- .../broad/tribble/vcf/VCFGenotypeRecord.java | 349 ------ java/src/org/broad/tribble/vcf/VCFHeader.java | 165 --- .../org/broad/tribble/vcf/VCFHeaderLine.java | 134 --- .../tribble/vcf/VCFHeaderLineTranslator.java | 116 -- .../broad/tribble/vcf/VCFHeaderLineType.java | 28 - .../broad/tribble/vcf/VCFHeaderVersion.java | 78 -- .../broad/tribble/vcf/VCFInfoHeaderLine.java | 25 - .../broad/tribble/vcf/VCFNamedHeaderLine.java | 30 - .../org/broad/tribble/vcf/VCFReaderUtils.java | 209 ---- java/src/org/broad/tribble/vcf/VCFRecord.java | 652 ----------- .../gatk/contexts/variantcontext/Allele.java | 404 ------- .../contexts/variantcontext/Genotype.java | 216 ---- .../InferredGeneticContext.java | 223 ---- .../variantcontext/MutableGenotype.java | 75 -- .../variantcontext/MutableVariantContext.java | 209 ---- .../variantcontext/VariantContext.java | 1040 ----------------- .../variantcontext/VariantContextUtils.java | 105 +- .../variantcontext/VariantJEXLContext.java | 11 +- .../io/storage/GenotypeWriterStorage.java | 2 +- .../gatk/io/stubs/GenotypeWriterStub.java | 2 +- .../sting/gatk/refdata/HapMapROD.java | 36 - .../sting/gatk/refdata/PlinkRod.java | 2 +- .../gatk/refdata/RefMetaDataTracker.java | 2 +- .../gatk/refdata/VariantContextAdaptors.java | 139 +-- .../features/beagle/BeagleFeature.java | 6 +- .../gatk/refdata/features/vcf4/VCF4Codec.java | 602 ---------- .../tracks/builders/RODTrackBuilder.java | 1 - .../builders/TribbleRMDTrackBuilder.java | 15 +- .../utils/FeatureToGATKFeatureIterator.java | 7 +- .../sting/gatk/walkers/VariantsToVCF.java | 9 +- .../gatk/walkers/annotator/Alignability.java | 2 +- .../gatk/walkers/annotator/AlleleBalance.java | 3 +- .../walkers/annotator/AnnotationByDepth.java | 2 +- .../walkers/annotator/ChromosomeCounts.java | 3 +- .../walkers/annotator/DepthOfCoverage.java | 2 +- .../annotator/DepthPerAlleleBySample.java | 4 +- .../gatk/walkers/annotator/GCContent.java | 2 +- .../walkers/annotator/HaplotypeScore.java | 1 + .../gatk/walkers/annotator/HardyWeinberg.java | 3 +- .../walkers/annotator/HomopolymerRun.java | 2 +- .../sting/gatk/walkers/annotator/LowMQ.java | 2 +- .../walkers/annotator/MappingQualityZero.java | 2 +- .../gatk/walkers/annotator/QualByDepth.java | 3 +- .../walkers/annotator/RMSMappingQuality.java | 2 +- .../gatk/walkers/annotator/RankSumTest.java | 3 +- .../gatk/walkers/annotator/SBByDepth.java | 4 +- .../walkers/annotator/SpanningDeletions.java | 2 +- .../walkers/annotator/VariantAnnotator.java | 5 +- .../annotator/VariantAnnotatorEngine.java | 6 +- .../interfaces/GenotypeAnnotation.java | 4 +- .../interfaces/InfoFieldAnnotation.java | 2 +- .../fasta/FastaAlternateReferenceWalker.java | 2 +- .../gatk/walkers/filters/ClusteredSnps.java | 5 +- .../walkers/filters/FiltrationContext.java | 2 +- .../filters/VariantFiltrationWalker.java | 6 +- .../walkers/genotyper/BatchedCallsMerger.java | 2 +- .../walkers/genotyper/CreateTriggerTrack.java | 2 +- .../DiploidGenotypeCalculationModel.java | 4 +- ...JointEstimateGenotypeCalculationModel.java | 6 +- .../SimpleIndelCalculationModel.java | 6 +- .../walkers/genotyper/UnifiedGenotyper.java | 2 +- .../genotyper/UnifiedGenotyperEngine.java | 2 +- .../walkers/genotyper/VariantCallContext.java | 2 +- .../gatk/walkers/indels/IndelRealigner.java | 4 +- .../indels/RealignerTargetCreator.java | 5 +- .../recalibration/CovariateCounterWalker.java | 4 +- .../walkers/sequenom/CreateSequenomMask.java | 2 +- .../walkers/sequenom/PickSequenomProbes.java | 9 +- .../sequenom/SequenomValidationConverter.java | 4 +- .../gatk/walkers/varianteval/CompOverlap.java | 4 +- .../varianteval/CountFunctionalClasses.java | 2 +- .../walkers/varianteval/CountVariants.java | 4 +- .../varianteval/GenotypeConcordance.java | 4 +- .../varianteval/IndelLengthHistogram.java | 2 +- .../MendelianViolationEvaluator.java | 6 +- .../varianteval/SimpleMetricsByAC.java | 5 +- .../varianteval/TiTvVariantEvaluator.java | 5 +- .../varianteval/VariantEvalWalker.java | 4 +- .../walkers/varianteval/VariantEvaluator.java | 2 +- .../varianteval/VariantQualityScore.java | 7 +- .../ApplyVariantCuts.java | 4 +- .../GenerateVariantClustersWalker.java | 6 +- .../VariantGaussianMixtureModel.java | 5 +- .../VariantRecalibrator.java | 7 +- .../walkers/variantutils/CombineVariants.java | 2 +- .../variantutils/FilterLiftedVariants.java | 2 +- .../variantutils/LiftoverVariants.java | 4 +- .../walkers/AlleleBalanceHistogramWalker.java | 2 +- .../AnnotationByAlleleFrequencyWalker.java | 2 +- .../walkers/BeagleOutputByDepthWalker.java | 13 +- .../walkers/CreateTiTvTrack.java | 9 +- .../walkers/IndelAnnotator.java | 2 +- .../walkers/IndelDBRateWalker.java | 17 +- .../walkers/MendelianViolationClassifier.java | 13 +- .../walkers/RealignedReadCounter.java | 3 +- .../walkers/TestVariantContextWalker.java | 2 +- .../walkers/VCF4ReaderTestWalker.java | 151 --- .../walkers/VCF4WriterTestWalker.java | 4 +- .../annotator/InsertSizeDistribution.java | 2 +- .../ProportionOfNonrefBasesSupportingSNP.java | 2 +- ...oportionOfRefSecondBasesSupportingSNP.java | 2 +- ...oportionOfSNPSecondBasesSupportingRef.java | 2 +- .../varianteval/AminoAcidTransition.java | 9 +- .../varianteval/SimpleMetricsBySample.java | 7 +- .../varianteval/VariantEvaluatorBySample.java | 4 +- .../gatk/walkers/BeagleOutputToVCFWalker.java | 8 +- .../gatk/walkers/LocusMismatchWalker.java | 5 +- .../walkers/ProduceBeagleInputWalker.java | 11 +- .../gatk/walkers/ReadBackedPhasingWalker.java | 25 +- .../walkers/SnpCallRateByCoverageWalker.java | 4 +- .../gatk/walkers/TrioGenotyperWalker.java | 8 +- .../gatk/walkers/VCFConcordance.java | 8 +- .../walkers/annotator/GenomicAnnotation.java | 4 +- .../walkers/annotator/GenomicAnnotator.java | 2 +- .../gatk/walkers/diagnostics/SNPDensity.java | 3 +- .../graphalign/GraphReferenceBuilder.java | 5 +- .../walkers/graphalign/ReferenceGraph.java | 3 +- .../papergenotyper/GATKPaperGenotyper.java | 3 - .../SecondaryBaseTransitionTableWalker.java | 2 +- .../validation/RodSystemValidationWalker.java | 5 +- .../AnalyzeAnnotationsWalker.java | 2 +- .../AnnotationDataManager.java | 2 +- .../gatk/walkers/vcftools/VariantSelect.java | 2 +- .../gatk/walkers/vcftools/VariantSubset.java | 4 +- .../sting/utils/genotype/CalledGenotype.java | 3 +- .../sting/utils/genotype/GenotypeWriter.java | 2 +- .../utils/genotype/geli/GeliAdapter.java | 7 +- .../utils/genotype/geli/GeliTextWriter.java | 11 +- .../sting/utils/genotype/glf/GLFWriter.java | 9 +- .../vcf/VCFGenotypeWriterAdapter.java | 2 +- .../utils/genotype/vcf/VCFParameters.java | 101 -- .../sting/utils/genotype/vcf/VCFReader.java | 2 +- .../sting/utils/genotype/vcf/VCFUtils.java | 29 +- .../sting/utils/genotype/vcf/VCFWriter.java | 8 +- .../variantcontext/AlleleUnitTest.java | 1 + .../VariantContextUnitTest.java | 48 +- .../VariantJEXLContextUnitTest.java | 4 +- .../VariantContextAdaptorsUnitTest.java | 2 +- .../refdata/features/vcf4/VCF4UnitTest.java | 492 -------- .../builders/IndexPerformanceTests.java | 99 +- .../CombineVariantsIntegrationTest.java | 6 +- .../variantutils/CombineVariantsUnitTest.java | 4 +- .../utils/genotype/vcf/VCFHeaderUnitTest.java | 3 +- .../utils/genotype/vcf/VCFWriterUnitTest.java | 8 +- 154 files changed, 536 insertions(+), 6458 deletions(-) delete mode 100755 java/src/org/broad/tribble/vcf/NameAwareCodec.java delete mode 100755 java/src/org/broad/tribble/vcf/VCF3Codec.java delete mode 100644 java/src/org/broad/tribble/vcf/VCFCodec.java delete mode 100644 java/src/org/broad/tribble/vcf/VCFCompoundHeaderLine.java delete mode 100755 java/src/org/broad/tribble/vcf/VCFConstants.java delete mode 100755 java/src/org/broad/tribble/vcf/VCFFilterHeaderLine.java delete mode 100755 java/src/org/broad/tribble/vcf/VCFFormatHeaderLine.java delete mode 100644 java/src/org/broad/tribble/vcf/VCFGenotypeEncoding.java delete mode 100644 java/src/org/broad/tribble/vcf/VCFGenotypeRecord.java delete mode 100644 java/src/org/broad/tribble/vcf/VCFHeader.java delete mode 100644 java/src/org/broad/tribble/vcf/VCFHeaderLine.java delete mode 100644 java/src/org/broad/tribble/vcf/VCFHeaderLineTranslator.java delete mode 100644 java/src/org/broad/tribble/vcf/VCFHeaderLineType.java delete mode 100644 java/src/org/broad/tribble/vcf/VCFHeaderVersion.java delete mode 100755 java/src/org/broad/tribble/vcf/VCFInfoHeaderLine.java delete mode 100755 java/src/org/broad/tribble/vcf/VCFNamedHeaderLine.java delete mode 100644 java/src/org/broad/tribble/vcf/VCFReaderUtils.java delete mode 100644 java/src/org/broad/tribble/vcf/VCFRecord.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/Allele.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/Genotype.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/InferredGeneticContext.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/MutableGenotype.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/MutableVariantContext.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContext.java delete mode 100755 java/src/org/broadinstitute/sting/gatk/refdata/HapMapROD.java delete mode 100644 java/src/org/broadinstitute/sting/gatk/refdata/features/vcf4/VCF4Codec.java delete mode 100755 java/src/org/broadinstitute/sting/oneoffprojects/walkers/VCF4ReaderTestWalker.java delete mode 100644 java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFParameters.java delete mode 100644 java/test/org/broadinstitute/sting/gatk/refdata/features/vcf4/VCF4UnitTest.java diff --git a/build.xml b/build.xml index 2031a103a..946e3763b 100644 --- a/build.xml +++ b/build.xml @@ -8,6 +8,9 @@ + + + @@ -108,7 +111,7 @@ - @@ -357,7 +360,8 @@ - + + @@ -378,26 +382,54 @@ - + - + - + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -482,11 +514,14 @@ - + + + + diff --git a/ivy.xml b/ivy.xml index 687461c84..3c9e6a4b0 100644 --- a/ivy.xml +++ b/ivy.xml @@ -17,7 +17,6 @@ - diff --git a/java/src/org/broad/tribble/vcf/NameAwareCodec.java b/java/src/org/broad/tribble/vcf/NameAwareCodec.java deleted file mode 100755 index 348866421..000000000 --- a/java/src/org/broad/tribble/vcf/NameAwareCodec.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2010, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broad.tribble.vcf; - -/** - * Created by IntelliJ IDEA. - * User: depristo - * Date: Jun 29, 2010 - * Time: 3:48:47 PM - * To change this template use File | Settings | File Templates. - */ -public interface NameAwareCodec { - public String getName(); - public void setName(String name); -} diff --git a/java/src/org/broad/tribble/vcf/VCF3Codec.java b/java/src/org/broad/tribble/vcf/VCF3Codec.java deleted file mode 100755 index 191e4c5b2..000000000 --- a/java/src/org/broad/tribble/vcf/VCF3Codec.java +++ /dev/null @@ -1,119 +0,0 @@ -package org.broad.tribble.vcf; - -import org.broad.tribble.Feature; -import org.broad.tribble.FeatureCodec; -import org.broad.tribble.exception.CodecLineParsingException; -import org.broad.tribble.readers.LineReader; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -/** - * - * User: delangel - * - * The reader for VCF 3 files - */ -public class VCF3Codec implements FeatureCodec { - - // we have to store the list of strings that make up the header until they're needed - private List headerStrings = new ArrayList(); - private VCFHeader header = null; - private VCFHeaderVersion version = VCFHeaderVersion.VCF3_3; - - - // some classes need to transform the line before - private LineTransform transformer = null; - - /** - * Fast path to get the location of the Feature for indexing - * @param line the input line to decode - * @return - */ - public Feature decodeLoc(String line) { - return reallyDecode(line, true); - } - - /** - * Decode a line as a Feature. - * - * @param line - * - * @return Return the Feature encoded by the line, or null if the line does not represent a feature (e.g. is - * a comment) - */ - public Feature decode(String line) { - return reallyDecode(line, false); - } - - private Feature reallyDecode(String line, boolean justLocationPlease ) { - // the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line - if (line.startsWith("#")) return null; - // transform the line, if we have a transform to do - if (transformer != null) line = transformer.lineTransform(line); - if (line.startsWith("#")) - return null; - - // make a VCFRecord of the line and return it - VCFRecord rec = VCFReaderUtils.createRecord(line, header, justLocationPlease); - if ( ! justLocationPlease ) rec.setHeader(header); - return rec; - } - - /** - * Return the # of header lines for this file. We use this to parse out the header - * - * @return 0 - */ - public Object readHeader(LineReader reader) { - String line = ""; - try { - while ((line = reader.readLine()) != null) { - if (line.startsWith("##")) { - headerStrings.add(line); - } - else if (line.startsWith("#")) { - headerStrings.add(line); - header = VCFReaderUtils.createHeader(headerStrings,version); - return header; - } - else { - throw new CodecLineParsingException("We never saw the required header line (starting with one #) for the input VCF file"); - } - - } - } catch (IOException e) { - throw new RuntimeException("IO Exception ", e); - } - throw new CodecLineParsingException("We never saw the required header line (starting with one #) for the input VCF file"); - } - - /** - * @return VCFRecord.class - */ - public Class getFeatureType() { - return VCFRecord.class; - } - - public static interface LineTransform { - public String lineTransform(String line); - } - - public LineTransform getTransformer() { - return transformer; - } - - public void setTransformer(LineTransform transformer) { - this.transformer = transformer; - } - - public VCFHeaderVersion getVersion() { - return version; - } - - public void setVersion(VCFHeaderVersion version) { - this.version = version; - } -} - diff --git a/java/src/org/broad/tribble/vcf/VCFCodec.java b/java/src/org/broad/tribble/vcf/VCFCodec.java deleted file mode 100644 index 9f15a35e7..000000000 --- a/java/src/org/broad/tribble/vcf/VCFCodec.java +++ /dev/null @@ -1,18 +0,0 @@ -package org.broad.tribble.vcf; - -import org.broadinstitute.sting.gatk.refdata.features.vcf4.VCF4Codec; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - - -/** - * - * @author aaron - * - * Class VCFCodec - * - * The codec for VCF, which relies on VCFReaderUtils to do most of the processing - */ -public class VCFCodec extends VCF4Codec {} diff --git a/java/src/org/broad/tribble/vcf/VCFCompoundHeaderLine.java b/java/src/org/broad/tribble/vcf/VCFCompoundHeaderLine.java deleted file mode 100644 index e337791f8..000000000 --- a/java/src/org/broad/tribble/vcf/VCFCompoundHeaderLine.java +++ /dev/null @@ -1,164 +0,0 @@ -/* - * Copyright (c) 2010, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broad.tribble.vcf; - -import java.util.Arrays; -import java.util.LinkedHashMap; -import java.util.Map; - -/** - * a base class for compound header lines, which include info lines and format lines (so far) - */ -public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCFNamedHeaderLine { - public enum SupportedHeaderLineType { - INFO(true), FORMAT(false); - - public final boolean allowFlagValues; - SupportedHeaderLineType(boolean flagValues) { - allowFlagValues = flagValues; - } - } - - // the field types - private String name; - private int count; - private String description; - private VCFHeaderLineType type; - - // access methods - public String getName() { return name; } - public int getCount() { return count; } - public String getDescription() { return description; } - public VCFHeaderLineType getType() { return type; } - - // - public void setNumberToUnbounded() { this.count = UNBOUNDED; } - - // our type of line, i.e. format, info, etc - private final SupportedHeaderLineType lineType; - - // line numerical values are allowed to be unbounded (or unknown), which is - // marked with a dot (.) - public static int UNBOUNDED = -1; // the value we store internally for unbounded types - - /** - * create a VCF format header line - * - * @param name the name for this header line - * @param count the count for this header line - * @param type the type for this header line - * @param description the description for this header line - */ - protected VCFCompoundHeaderLine(String name, int count, VCFHeaderLineType type, String description, SupportedHeaderLineType lineType) { - super(lineType.toString(), ""); - this.name = name; - this.count = count; - this.type = type; - this.description = description; - this.lineType = lineType; - validate(); - } - - /** - * create a VCF format header line - * - * @param line the header line - * @param version the VCF header version - * - */ - protected VCFCompoundHeaderLine(String line, VCFHeaderVersion version, SupportedHeaderLineType lineType) { - super(lineType.toString(), ""); - Map mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Number","Type","Description")); - name = mapping.get("ID"); - count = version == VCFHeaderVersion.VCF4_0 ? - mapping.get("Number").equals(VCFConstants.UNBOUNDED_ENCODING_v4) ? UNBOUNDED : Integer.valueOf(mapping.get("Number")) : - mapping.get("Number").equals(VCFConstants.UNBOUNDED_ENCODING_v3) ? UNBOUNDED : Integer.valueOf(mapping.get("Number")); - type = VCFHeaderLineType.valueOf(mapping.get("Type")); - if (type == VCFHeaderLineType.Flag && !allowFlagValues()) - throw new IllegalArgumentException("Flag is an unsupported type for this kind of field"); - - description = mapping.get("Description"); - if ( description == null && ALLOW_UNBOUND_DESCRIPTIONS ) // handle the case where there's no description provided - description = UNBOUND_DESCRIPTION; - - this.lineType = lineType; - - validate(); - } - - private void validate() { - if ( name == null || type == null || description == null || lineType == null ) - throw new IllegalArgumentException(String.format("Invalid VCFCompoundHeaderLine: key=%s name=%s type=%s desc=%s lineType=%s", - super.getKey(), name, type, description, lineType )); - } - - /** - * make a string representation of this header line - * @return a string representation - */ - protected String toStringEncoding() { - Map map = new LinkedHashMap(); - map.put("ID", name); - map.put("Number", count == UNBOUNDED ? VCFConstants.UNBOUNDED_ENCODING_v4 : count); - map.put("Type", type); - map.put("Description", description); - return lineType.toString() + "=" + VCFHeaderLine.toStringEncoding(map); - } - - /** - * returns true if we're equal to another compounder header line - * @param o a compound header line - * @return true if equal - */ - public boolean equals(Object o) { - if ( !(o instanceof VCFCompoundHeaderLine) ) - return false; - VCFCompoundHeaderLine other = (VCFCompoundHeaderLine)o; - return name.equals(other.name) && - count == other.count && - description.equals(other.description) && - type == other.type && - lineType == other.lineType; - } - - public boolean equalsExcludingDescription(VCFCompoundHeaderLine other) { - return count == other.count && - type == other.type && - lineType == other.lineType && - name.equals(other.name); - } - - public boolean sameLineTypeAndName(VCFCompoundHeaderLine other) { - return lineType == other.lineType && - name.equals(other.name); - } - - /** - * do we allow flag (boolean) values? (i.e. booleans where you don't have specify the value, AQ means AQ=true) - * @return true if we do, false otherwise - */ - abstract boolean allowFlagValues(); - -} diff --git a/java/src/org/broad/tribble/vcf/VCFConstants.java b/java/src/org/broad/tribble/vcf/VCFConstants.java deleted file mode 100755 index 616261287..000000000 --- a/java/src/org/broad/tribble/vcf/VCFConstants.java +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (c) 2010. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broad.tribble.vcf; - -public final class VCFConstants { - // standard INFO/FORMAT field keys - public static final String ANCESTRAL_ALLELE_KEY = "AA"; - public static final String ALLELE_COUNT_KEY = "AC"; - public static final String ALLELE_FREQUENCY_KEY = "AF"; - public static final String ALLELE_NUMBER_KEY = "AN"; - public static final String RMS_BASE_QUALITY_KEY = "BQ"; - public static final String CIGAR_KEY = "CIGAR"; - public static final String DBSNP_KEY = "DB"; - public static final String DEPTH_KEY = "DP"; - public static final String END_KEY = "END"; - public static final String GENOTYPE_FILTER_KEY = "FT"; - public static final String GENOTYPE_KEY = "GT"; - public static final String GENOTYPE_LIKELIHOODS_KEY = "GL"; - public static final String GENOTYPE_QUALITY_KEY = "GQ"; - public static final String HAPMAP2_KEY = "H2"; - public static final String HAPMAP3_KEY = "H3"; - public static final String HAPLOTYPE_QUALITY_KEY = "HQ"; - public static final String RMS_MAPPING_QUALITY_KEY = "MQ"; - public static final String MAPPING_QUALITY_ZERO_KEY = "MQ0"; - public static final String SAMPLE_NUMBER_KEY = "NS"; - public static final String OLD_DEPTH_KEY = "RD"; - public static final String STRAND_BIAS_KEY = "SB"; - public static final String SOMATIC_KEY = "SOMATIC"; - public static final String VALIDATED_KEY = "VALIDATED"; - - // separators - public static final String FORMAT_FIELD_SEPARATOR = ":"; - public static final String GENOTYPE_FIELD_SEPARATOR = ":"; - public static final String FIELD_SEPARATOR = "\t"; - public static final String FILTER_CODE_SEPARATOR = ";"; - public static final String INFO_FIELD_SEPARATOR = ";"; - public static final String UNPHASED = "/"; - public static final String PHASED = "|"; - public static final String PHASED_SWITCH_PROB_v3 = "\\"; - - // missing/default values - public static final String UNFILTERED = "."; - public static final String PASSES_FILTERS_v3 = "0"; - public static final String PASSES_FILTERS_v4 = "PASS"; - public static final String EMPTY_ID_FIELD = "."; - public static final String EMPTY_INFO_FIELD = "."; - public static final String EMPTY_ALTERNATE_ALLELE_FIELD = "."; - public static final String MISSING_VALUE_v4 = "."; - public static final String MISSING_QUALITY_v3 = "-1"; - public static final String MISSING_GENOTYPE_QUALITY_v3 = "-1"; - public static final String MISSING_HAPLOTYPE_QUALITY_v3 = "-1"; - public static final String MISSING_DEPTH_v3 = "-1"; - public static final String UNBOUNDED_ENCODING_v4 = "."; - public static final String UNBOUNDED_ENCODING_v3 = "-1"; - public static final String EMPTY_ALLELE = "."; - public static final String EMPTY_GENOTYPE = "./."; - public static final double MAX_GENOTYPE_QUAL = 99.0; - - public static final String DOUBLE_PRECISION_FORMAT_STRING = "%.2f"; - public static final String DOUBLE_PRECISION_INT_SUFFIX = ".00"; -} \ No newline at end of file diff --git a/java/src/org/broad/tribble/vcf/VCFFilterHeaderLine.java b/java/src/org/broad/tribble/vcf/VCFFilterHeaderLine.java deleted file mode 100755 index b7869f90d..000000000 --- a/java/src/org/broad/tribble/vcf/VCFFilterHeaderLine.java +++ /dev/null @@ -1,70 +0,0 @@ -package org.broad.tribble.vcf; - -import java.util.Arrays; -import java.util.LinkedHashMap; -import java.util.Map; - - -/** - * @author ebanks - * A class representing a key=value entry for FILTER fields in the VCF header - */ -public class VCFFilterHeaderLine extends VCFHeaderLine implements VCFNamedHeaderLine { - - private String name; - private String description; - - - /** - * create a VCF filter header line - * - * @param name the name for this header line - * @param description the description for this header line - */ - public VCFFilterHeaderLine(String name, String description) { - super("FILTER", ""); - this.name = name; - this.description = description; - - if ( name == null || description == null ) - throw new IllegalArgumentException(String.format("Invalid VCFCompoundHeaderLine: key=%s name=%s desc=%s", super.getKey(), name, description )); - } - - /** - * create a VCF info header line - * - * @param line the header line - * @param version the vcf header version - */ - protected VCFFilterHeaderLine(String line, VCFHeaderVersion version) { - super("FILTER", ""); - Map mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Description")); - name = mapping.get("ID"); - description = mapping.get("Description"); - if ( description == null && ALLOW_UNBOUND_DESCRIPTIONS ) // handle the case where there's no description provided - description = UNBOUND_DESCRIPTION; - } - - protected String toStringEncoding() { - Map map = new LinkedHashMap(); - map.put("ID", name); - map.put("Description", description); - return "FILTER=" + VCFHeaderLine.toStringEncoding(map); - } - - public boolean equals(Object o) { - if ( !(o instanceof VCFFilterHeaderLine) ) - return false; - VCFFilterHeaderLine other = (VCFFilterHeaderLine)o; - return name.equals(other.name) && - description.equals(other.description); - } - - public String getName() { - return name; - } - - public String getDescription() { - return description; - } -} \ No newline at end of file diff --git a/java/src/org/broad/tribble/vcf/VCFFormatHeaderLine.java b/java/src/org/broad/tribble/vcf/VCFFormatHeaderLine.java deleted file mode 100755 index c48c7c1c5..000000000 --- a/java/src/org/broad/tribble/vcf/VCFFormatHeaderLine.java +++ /dev/null @@ -1,28 +0,0 @@ -package org.broad.tribble.vcf; - - -/** - * @author ebanks - *

- * Class VCFFormatHeaderLine - *

- * A class representing a key=value entry for genotype FORMAT fields in the VCF header - */ -public class VCFFormatHeaderLine extends VCFCompoundHeaderLine { - - public VCFFormatHeaderLine(String name, int count, VCFHeaderLineType type, String description) { - super(name, count, type, description, SupportedHeaderLineType.FORMAT); - if (type == VCFHeaderLineType.Flag) - throw new IllegalArgumentException("Flag is an unsupported type for format fields"); - } - - protected VCFFormatHeaderLine(String line, VCFHeaderVersion version) { - super(line, version, SupportedHeaderLineType.FORMAT); - } - - // format fields do not allow flag values (that wouldn't make much sense, how would you encode this in the genotype). - @Override - boolean allowFlagValues() { - return false; - } -} \ No newline at end of file diff --git a/java/src/org/broad/tribble/vcf/VCFGenotypeEncoding.java b/java/src/org/broad/tribble/vcf/VCFGenotypeEncoding.java deleted file mode 100644 index d5c4e30f4..000000000 --- a/java/src/org/broad/tribble/vcf/VCFGenotypeEncoding.java +++ /dev/null @@ -1,136 +0,0 @@ -package org.broad.tribble.vcf; - - - -/** - * @author aaron - *

- * Class VCFGenotypeEncoding - *

- * basic encoding class for genotype fields in VCF - */ -public class VCFGenotypeEncoding { - public enum TYPE { - SINGLE_BASE, - INSERTION, - DELETION, - UNCALLED, - MIXED // this type is only valid in aggregate, not for a single VCFGenotypeEncoding - } - - // our length (0 for SINGLE_BASE), our bases, and our type - private final int mLength; - private final String mBases; - private final TYPE mType; - - // public constructor, that parses out the base string - public VCFGenotypeEncoding(String baseString){ - this(baseString, false); - } - public VCFGenotypeEncoding(String baseString, boolean allowMultipleBaseReference) { - if ((baseString.length() == 1)) { - // are we an empty (no-call) genotype? - if (baseString.equals(VCFConstants.EMPTY_ALLELE)) { - mBases = VCFConstants.EMPTY_ALLELE; - mLength = 0; - mType = TYPE.UNCALLED; - } else if (!validBases(baseString)) { - throw new IllegalArgumentException("Alleles of length 1 must be one of A,C,G,T, " + baseString + " was passed in"); - } else { // we're a valid base - mBases = baseString.toUpperCase(); - mLength = 0; - mType = TYPE.SINGLE_BASE; - } - } else { // deletion or insertion - if (baseString.length() < 1 ||(!allowMultipleBaseReference && (baseString.toUpperCase().charAt(0) != 'D' && baseString.toUpperCase().charAt(0) != 'I'))) { - throw new IllegalArgumentException("Genotype encoding of " + baseString + " was passed in, but is not a valid deletion, insertion, base, or no call (.)"); - } - if (baseString.toUpperCase().charAt(0) == 'D') { - mLength = Integer.valueOf(baseString.substring(1, baseString.length())); - mBases = ""; - mType = TYPE.DELETION; - } else if (baseString.toUpperCase().charAt(0) == 'I') { // we're an I - mBases = baseString.substring(1, baseString.length()).toUpperCase(); - if (!validBases(mBases)) - throw new IllegalArgumentException("The insertion base string contained invalid bases -> " + baseString); - mLength = mBases.length(); - mType = TYPE.INSERTION; - } else{ - mBases = baseString; - mType = TYPE.MIXED; - mLength = mBases.length(); - } - } - } - - public int getLength() { - return mLength; - } - - public String getBases() { - return mBases; - } - - public TYPE getType() { - return mType; - } - - public boolean equals(Object obj) { - if ( obj == null ) - return false; - if ( obj instanceof VCFGenotypeEncoding ) { - VCFGenotypeEncoding d = (VCFGenotypeEncoding) obj; - return (mType == d.mType) && (mBases.equals(d.mBases)) && (mLength == d.mLength); - } - if ( mType == TYPE.UNCALLED && obj.toString().equals(VCFConstants.EMPTY_ALLELE) ) - return true; - return false; - } - - public int hashCode() { - // our underlying data is immutable, so this is safe (we won't strand a value in a hashtable somewhere - // when the data changes underneath, altering this value). - String str = this.mBases + String.valueOf(this.mLength) + this.mType.toString(); - return str.hashCode(); - } - - /** - * dump the string representation of this genotype encoding - * - * @return string representation - */ - public String toString() { - StringBuilder builder = new StringBuilder(); - switch (mType) { - case SINGLE_BASE: - case UNCALLED: - case MIXED: - builder.append(mBases); - break; - case INSERTION: - builder.append("I"); - builder.append(mBases); - break; - case DELETION: - builder.append("D"); - builder.append(mLength); - break; - } - return builder.toString(); - } - - /** - * ensure that string contains valid bases - * - * @param bases the bases to check - * - * @return true if they're all either A,C,G,T; false otherwise - */ - private static boolean validBases(String bases) { - for (char c : bases.toUpperCase().toCharArray()) { - if (c != 'A' && c != 'C' && c != 'G' && c != 'T' && c != 'N') - return false; - } - return true; - } -} \ No newline at end of file diff --git a/java/src/org/broad/tribble/vcf/VCFGenotypeRecord.java b/java/src/org/broad/tribble/vcf/VCFGenotypeRecord.java deleted file mode 100644 index 2d8b51cbe..000000000 --- a/java/src/org/broad/tribble/vcf/VCFGenotypeRecord.java +++ /dev/null @@ -1,349 +0,0 @@ -package org.broad.tribble.vcf; - -import org.broadinstitute.sting.utils.Utils; - -import java.util.*; - - -/** - * - * @author aaron - * - * Class VCFGenotypeRecord - * - * the basics of a genotype call in VCF - */ -public class VCFGenotypeRecord { - - public static final double MAX_QUAL_VALUE = 99.0; - - // what kind of phasing this genotype has - public enum PHASE { - UNPHASED("/"), PHASED("|"), PHASED_SWITCH_PROB("\\"); // , UNKNOWN - - String genotypeSeparator; - PHASE(String sep) { this.genotypeSeparator = sep; } - } - - // our record - private VCFRecord mRecord; - - // our phasing - private PHASE mPhaseType; - - // our bases(s) - private final List mGenotypeAlleles = new ArrayList(); - - // our mapping of the format mFields to values - private final Map mFields = new HashMap(); - - // our sample name - private String mSampleName; - - /** - * Create a VCF genotype record - * - * @param sampleName sample name - * @param genotypes list of genotypes - * @param phasing phasing - */ - public VCFGenotypeRecord(String sampleName, List genotypes, PHASE phasing) { - mSampleName = sampleName; - if (genotypes != null) - this.mGenotypeAlleles.addAll(genotypes); - mPhaseType = phasing; - } - - public void setVCFRecord(VCFRecord record) { - mRecord = record; - } - - public void setSampleName(String name) { - mSampleName = name; - } - - /** - * Adds a field to the genotype record. - * Throws an exception if the key is GT, as that's computed internally. - * - * @param key the field name (use static variables above for common fields) - * @param value the field value - */ - public void setField(String key, String value) { - // make sure the GT field isn't being set - if ( key.equals(VCFConstants.GENOTYPE_KEY) ) - throw new IllegalArgumentException("Setting the GT field is not allowed as that's done internally"); - mFields.put(key, value); - } - - /** - * determine the phase of the genotype - * - * @param phase the string that contains the phase character - * - * @return the phase - */ - static PHASE determinePhase(String phase) { - // find the phasing information - for ( PHASE p : PHASE.values() ) { - if (phase.equals(p.genotypeSeparator)) - return p; - } - - throw new IllegalArgumentException("Unknown genotype phasing parameter: " + phase); - } - - - public PHASE getPhaseType() { - return mPhaseType; - } - - public String getSampleName() { - return mSampleName; - } - - public List getAlleles() { - return mGenotypeAlleles; - } - - public Map getFields() { - return mFields; - } - - /** - * @return the phred-scaled quality score - */ - public double getQual() { - return ( mFields.containsKey(VCFConstants.GENOTYPE_QUALITY_KEY) ? Double.valueOf(mFields.get(VCFConstants.GENOTYPE_QUALITY_KEY)) : Double.valueOf(VCFConstants.MISSING_GENOTYPE_QUALITY_v3)); - } - - public boolean isMissingQual() { - return VCFConstants.MISSING_GENOTYPE_QUALITY_v3.equals(String.valueOf((int)getQual())); - } - - public double getNegLog10PError() { - return (isMissingQual() ? Double.valueOf(VCFConstants.MISSING_GENOTYPE_QUALITY_v3) : getQual() / 10.0); - } - - public int getReadCount() { - return ( mFields.containsKey(VCFConstants.DEPTH_KEY) ? Integer.valueOf(mFields.get(VCFConstants.DEPTH_KEY)) : Integer.valueOf(VCFConstants.MISSING_DEPTH_v3)); - } - - public String getLocation() { - return mRecord != null ? mRecord.getChr() + ":" + mRecord.getPosition() : null; - } - - public String getReference() { - return mRecord != null ? mRecord.getReference() : "N"; - } - - public String getBases() { - String genotype = ""; - for ( VCFGenotypeEncoding encoding : mGenotypeAlleles ) - genotype += encoding.getBases(); - return genotype; - } - - public boolean isVariant(char ref) { - for ( VCFGenotypeEncoding encoding : mGenotypeAlleles ) { - if ( encoding.getType() == VCFGenotypeEncoding.TYPE.UNCALLED ) - continue; - if ( encoding.getType() != VCFGenotypeEncoding.TYPE.SINGLE_BASE || - encoding.getBases().charAt(0) != ref ) - return true; - } - return false; - } - - public boolean isPointGenotype() { - return (mRecord != null ? !mRecord.isIndel() : true); - } - - public boolean isHom() { - if ( mGenotypeAlleles.size() == 0 ) - return true; - - String bases = mGenotypeAlleles.get(0).getBases(); - for ( int i = 1; i < mGenotypeAlleles.size(); i++ ) { - if ( !bases.equals(mGenotypeAlleles.get(1).getBases()) ) - return false; - } - return true; - } - - public boolean isHet() { - return !isHom(); - } - - public boolean isNoCall() { - for ( VCFGenotypeEncoding encoding : mGenotypeAlleles ) { - if ( encoding.getType() != VCFGenotypeEncoding.TYPE.UNCALLED ) - return false; - } - return true; - } - - public boolean isFiltered() { - return ( mFields.get(VCFConstants.GENOTYPE_FILTER_KEY) != null && - !mFields.get(VCFConstants.GENOTYPE_FILTER_KEY).equals(VCFConstants.UNFILTERED) && - !mFields.get(VCFConstants.GENOTYPE_FILTER_KEY).equals(VCFConstants.PASSES_FILTERS_v3)); - } - - public int getPloidy() { - return mGenotypeAlleles.size(); - } - - public VCFRecord getRecord() { - return mRecord; - } - - private String toGenotypeString(List altAlleles) { - List alleleStrings = new ArrayList(altAlleles.size()); - for (VCFGenotypeEncoding allele : mGenotypeAlleles) { - if (allele.getType() == VCFGenotypeEncoding.TYPE.UNCALLED) - alleleStrings.add(VCFConstants.EMPTY_ALLELE); - else - alleleStrings.add(String.valueOf((altAlleles.contains(allele)) ? altAlleles.indexOf(allele) + 1 : 0)); - } - - return Utils.join(mPhaseType.genotypeSeparator, alleleStrings); - } - - @Override - public String toString() { - return String.format("[VCFGenotype %s %s %s %s]", getLocation(), mSampleName, this.mGenotypeAlleles, mFields); - } - - public boolean isEmptyGenotype() { - for ( VCFGenotypeEncoding encoding : mGenotypeAlleles ) { - if ( encoding.getType() != VCFGenotypeEncoding.TYPE.UNCALLED ) - return false; - } - return true; - } - - public boolean equals(Object other) { - if (other instanceof VCFGenotypeRecord) { - if (((VCFGenotypeRecord) other).mPhaseType != this.mPhaseType) return false; - if (!((VCFGenotypeRecord) other).mGenotypeAlleles.equals(this.mGenotypeAlleles)) return false; - if (!((VCFGenotypeRecord) other).mFields.equals(mFields)) return false; - if (!((VCFGenotypeRecord) other).mSampleName.equals(this.mSampleName)) return false; - return true; - } - return false; - } - - /** - * output a string representation of the VCFGenotypeRecord, given the alternate alleles - * - * @param altAlleles the alternate alleles, needed for toGenotypeString() - * @param genotypeFormatStrings genotype format strings - * - * @return a string - */ - public String toStringEncoding(List altAlleles, String[] genotypeFormatStrings) { - return toStringEncoding(altAlleles, genotypeFormatStrings, false); - } - - public String toStringEncoding(List altAlleles, String[] genotypeFormatStrings, boolean doVCF40) { - StringBuilder builder = new StringBuilder(); - builder.append(toGenotypeString(altAlleles)); - - for ( String field : genotypeFormatStrings ) { - if ( field.equals(VCFConstants.GENOTYPE_KEY) ) - continue; - - String value = mFields.get(field); - if ( value == null && field.equals(VCFConstants.OLD_DEPTH_KEY) ) - value = mFields.get(VCFConstants.DEPTH_KEY); - - builder.append(VCFConstants.GENOTYPE_FIELD_SEPARATOR); - if ( value == null || value.equals("") ) - builder.append(getMissingFieldValue(field, doVCF40)); - else - builder.append(value); - } - - return builder.toString(); - } - - /** - * output a string representation of an empty genotype - * - * @param genotypeFormatStrings genotype format strings - * - * @return a string - */ - public static String stringEncodingForEmptyGenotype(String[] genotypeFormatStrings) { - // backward compatibility to VCF 3.3 - return stringEncodingForEmptyGenotype(genotypeFormatStrings, false); - } - public static String stringEncodingForEmptyGenotype(String[] genotypeFormatStrings, boolean doVCF40) { - StringBuilder builder = new StringBuilder(); - builder.append(VCFConstants.EMPTY_GENOTYPE); - - for ( String field : genotypeFormatStrings ) { - if ( field.equals(VCFConstants.GENOTYPE_KEY) ) - continue; - - // in VCF4.0, if a genotype is empty only the ./. key can be included - if (!doVCF40) { - builder.append(VCFConstants.GENOTYPE_FIELD_SEPARATOR); - builder.append(getMissingFieldValue(field)); - } - } - - return builder.toString(); - } - - public static String getMissingFieldValue(String field) { - // backward compatibility to VCF 3.3 - return getMissingFieldValue(field, false); - } - public static String getMissingFieldValue(String field, boolean doVCF40) { - String result; - if (doVCF40) { - result = "."; // default missing value - // TODO - take number of elements in field as input and output corresponding .'s - if ( field.equals(VCFConstants.GENOTYPE_LIKELIHOODS_KEY) ) - result = ".,.,."; - else if ( field.equals(VCFConstants.HAPLOTYPE_QUALITY_KEY) ) - result = ".,."; - - } - else { - result = ""; - - - if ( field.equals(VCFConstants.GENOTYPE_QUALITY_KEY) ) - result = String.valueOf(VCFConstants.MISSING_GENOTYPE_QUALITY_v3); - else if ( field.equals(VCFConstants.DEPTH_KEY) || field.equals(VCFConstants.OLD_DEPTH_KEY) ) - result = String.valueOf(VCFConstants.MISSING_DEPTH_v3); - else if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) - result = VCFConstants.UNFILTERED; - else if ( field.equals(VCFConstants.GENOTYPE_LIKELIHOODS_KEY) ) - result = "0,0,0"; - // TODO -- support haplotype quality - //else if ( field.equals(HAPLOTYPE_QUALITY_KEY) ) - // result = String.valueOf(MISSING_HAPLOTYPE_QUALITY); - } - return result; - } - - public static Set getSupportedHeaderStrings(VCFHeaderVersion version) { - Set result = new HashSet(); - result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype")); - result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality")); - result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)")); - result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_LIKELIHOODS_KEY, 3, VCFHeaderLineType.Float, "Log-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; not applicable if site is not biallelic")); - //result.add(new VCFFormatHeaderLine(HAPLOTYPE_QUALITY_KEY, 1, VCFFormatHeaderLine.INFO_TYPE.Integer, "Haplotype Quality")); - return result; - } - - public void replaceFields(HashMap newFields) { - mFields.clear(); - for ( String s : newFields.keySet() ) { - mFields.put(s,newFields.get(s)); - } - } -} diff --git a/java/src/org/broad/tribble/vcf/VCFHeader.java b/java/src/org/broad/tribble/vcf/VCFHeader.java deleted file mode 100644 index 90dc00d64..000000000 --- a/java/src/org/broad/tribble/vcf/VCFHeader.java +++ /dev/null @@ -1,165 +0,0 @@ -package org.broad.tribble.vcf; - - -import java.util.*; - - -/** - * @author aaron - *

- * Class VCFHeader - *

- * A class representing the VCF header - */ -public class VCFHeader { - - // the mandatory header fields - public enum HEADER_FIELDS { - CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO - } - - // the associated meta data - private final Set mMetaData; - private final Map mInfoMetaData = new HashMap(); - private final Map mFormatMetaData = new HashMap(); - - // the list of auxillary tags - private final Set mGenotypeSampleNames = new LinkedHashSet(); - - // the character string that indicates meta data - public static final String METADATA_INDICATOR = "##"; - - // the header string indicator - public static final String HEADER_INDICATOR = "#"; - - /** do we have genotying data? */ - private boolean hasGenotypingData = false; - - /** - * create a VCF header, given a list of meta data and auxillary tags - * - * @param metaData the meta data associated with this header - */ - public VCFHeader(Set metaData) { - mMetaData = new TreeSet(metaData); - loadVCFVersion(); - loadMetaDataMaps(); - } - - /** - * create a VCF header, given a list of meta data and auxillary tags - * - * @param metaData the meta data associated with this header - * @param genotypeSampleNames the genotype format field, and the sample names - */ - public VCFHeader(Set metaData, Set genotypeSampleNames) { - mMetaData = new TreeSet(); - if ( metaData != null ) - mMetaData.addAll(metaData); - for (String col : genotypeSampleNames) { - if (!col.equals("FORMAT")) - mGenotypeSampleNames.add(col); - } - if (genotypeSampleNames.size() > 0) hasGenotypingData = true; - loadVCFVersion(); - loadMetaDataMaps(); - } - - /** - * check our metadata for a VCF version tag, and throw an exception if the version is out of date - * or the version is not present - */ - public void loadVCFVersion() { - List toRemove = new ArrayList(); - for ( VCFHeaderLine line : mMetaData ) - if ( VCFHeaderVersion.isFormatString(line.getKey())) { - toRemove.add(line); - } - // remove old header lines for now, - mMetaData.removeAll(toRemove); - - } - - /** - * load the format/info meta data maps (these are used for quick lookup by key name) - */ - private void loadMetaDataMaps() { - for ( VCFHeaderLine line : mMetaData ) { - if ( line instanceof VCFInfoHeaderLine ) { - VCFInfoHeaderLine infoLine = (VCFInfoHeaderLine)line; - mInfoMetaData.put(infoLine.getName(), infoLine); - } - else if ( line instanceof VCFFormatHeaderLine ) { - VCFFormatHeaderLine formatLine = (VCFFormatHeaderLine)line; - mFormatMetaData.put(formatLine.getName(), formatLine); - } - } - } - - /** - * get the header fields in order they're presented in the input file (which is now required to be - * the order presented in the spec). - * - * @return a set of the header fields, in order - */ - public Set getHeaderFields() { - Set fields = new LinkedHashSet(); - for (HEADER_FIELDS field : HEADER_FIELDS.values()) - fields.add(field); - return fields; - } - - /** - * get the meta data, associated with this header - * - * @return a set of the meta data - */ - public Set getMetaData() { - Set lines = new LinkedHashSet(); - lines.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_0.getFormatString(), VCFHeaderVersion.VCF4_0.getVersionString())); - lines.addAll(mMetaData); - return lines; - } - - /** - * get the genotyping sample names - * - * @return a list of the genotype column names, which may be empty if hasGenotypingData() returns false - */ - public Set getGenotypeSamples() { - return mGenotypeSampleNames; - } - - /** - * do we have genotyping data? - * - * @return true if we have genotyping columns, false otherwise - */ - public boolean hasGenotypingData() { - return hasGenotypingData; - } - - /** @return the column count */ - public int getColumnCount() { - return HEADER_FIELDS.values().length + ((hasGenotypingData) ? mGenotypeSampleNames.size() + 1 : 0); - } - - /** - * @param key the header key name - * @return the meta data line, or null if there is none - */ - public VCFInfoHeaderLine getInfoHeaderLine(String key) { - return mInfoMetaData.get(key); - } - - /** - * @param key the header key name - * @return the meta data line, or null if there is none - */ - public VCFFormatHeaderLine getFormatHeaderLine(String key) { - return mFormatMetaData.get(key); - } -} - - - diff --git a/java/src/org/broad/tribble/vcf/VCFHeaderLine.java b/java/src/org/broad/tribble/vcf/VCFHeaderLine.java deleted file mode 100644 index a0021187c..000000000 --- a/java/src/org/broad/tribble/vcf/VCFHeaderLine.java +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Copyright (c) 2010. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broad.tribble.vcf; - -import org.broadinstitute.sting.utils.StingException; - -import java.util.Map; - - -/** - * @author ebanks - *

- * Class VCFHeaderLine - *

- * A class representing a key=value entry in the VCF header - */ -public class VCFHeaderLine implements Comparable { - protected static boolean ALLOW_UNBOUND_DESCRIPTIONS = true; - protected static String UNBOUND_DESCRIPTION = "Not provided in original VCF header"; - - private String mKey = null; - private String mValue = null; - - - /** - * create a VCF header line - * - * @param key the key for this header line - * @param value the value for this header line - */ - public VCFHeaderLine(String key, String value) { - if ( key == null ) - throw new IllegalArgumentException("VCFHeaderLine: key cannot be null: key = " + key); - mKey = key; - mValue = value; - } - - /** - * Get the key - * - * @return the key - */ - public String getKey() { - return mKey; - } - - /** - * Get the value - * - * @return the value - */ - public String getValue() { - return mValue; - } - - public String toString() { - return toStringEncoding(); - } - - /** - * Should be overloaded in sub classes to do subclass specific - * - * @return the string encoding - */ - protected String toStringEncoding() { - return mKey + "=" + mValue; - } - - public boolean equals(Object o) { - if ( !(o instanceof VCFHeaderLine) ) - return false; - return mKey.equals(((VCFHeaderLine)o).getKey()) && mValue.equals(((VCFHeaderLine)o).getValue()); - } - - public int compareTo(Object other) { - return toString().compareTo(other.toString()); - } - - /** - * @param line the line - * @return true if the line is a VCF meta data line, or false if it is not - */ - public static boolean isHeaderLine(String line) { - return line != null && line.length() > 0 && VCFHeader.HEADER_INDICATOR.equals(line.substring(0,1)); - } - - /** - * create a string of a mapping pair for the target VCF version - * @param keyValues a mapping of the key->value pairs to output - * @return a string, correctly formatted - */ - public static String toStringEncoding(Map keyValues) { - StringBuilder builder = new StringBuilder(); - builder.append("<"); - boolean start = true; - for (Map.Entry entry : keyValues.entrySet()) { - if (start) start = false; - else builder.append(","); - - if ( entry.getValue() == null ) throw new StingException("Header problem: unbound value at " + entry + " from " + keyValues); - - builder.append(entry.getKey()); - builder.append("="); - builder.append(entry.getValue().toString().contains(",") || - entry.getValue().toString().contains(" ") || - entry.getKey().equals("Description") ? "\""+ entry.getValue() + "\"" : entry.getValue()); - } - builder.append(">"); - return builder.toString(); - } -} \ No newline at end of file diff --git a/java/src/org/broad/tribble/vcf/VCFHeaderLineTranslator.java b/java/src/org/broad/tribble/vcf/VCFHeaderLineTranslator.java deleted file mode 100644 index 7cac1553f..000000000 --- a/java/src/org/broad/tribble/vcf/VCFHeaderLineTranslator.java +++ /dev/null @@ -1,116 +0,0 @@ -package org.broad.tribble.vcf; - -import java.util.*; - -/** - * A class for translating between vcf header versions - */ -public class VCFHeaderLineTranslator { - private static Map mapping; - - static { - mapping = new HashMap(); - mapping.put(VCFHeaderVersion.VCF4_0,new VCF4Parser()); - mapping.put(VCFHeaderVersion.VCF3_3,new VCF3Parser()); - } - - public static Map parseLine(VCFHeaderVersion version, String valueLine, List expectedTagOrder) { - return mapping.get(version).parseLine(valueLine,expectedTagOrder); - } -} - - -interface VCFLineParser { - public Map parseLine(String valueLine, List expectedTagOrder); -} - - -/** - * a class that handles the to and from disk for VCF 4 lines - */ -class VCF4Parser implements VCFLineParser { - Set bracketed = new HashSet(); - - /** - * parse a VCF4 line - * @param valueLine the line - * @return a mapping of the tags parsed out - */ - public Map parseLine(String valueLine, List expectedTagOrder) { - // our return map - Map ret = new LinkedHashMap(); - - // a builder to store up characters as we go - StringBuilder builder = new StringBuilder(); - - // store the key when we're parsing out the values - String key = ""; - - // where are we in the stream of characters? - int index = 0; - - // are we inside a quotation? we don't special case ',' then - boolean inQuote = false; - - // a little switch machine to parse out the tags. Regex ended up being really complicated and ugly - for (char c: valueLine.toCharArray()) { - switch (c) { - case ('<') : if (index == 0) break; // if we see a open bracket at the beginning, ignore it - case ('>') : if (index == valueLine.length()-1) ret.put(key,builder.toString().trim()); break; // if we see a close bracket, and we're at the end, add an entry to our list - case ('=') : if (!inQuote) { key = builder.toString().trim(); builder = new StringBuilder(); } else { builder.append(c); } break; // at an equals, copy the key and reset the builder - case ('\"') : inQuote = !inQuote; break; // a quote means we ignore ',' in our strings, keep track of it - case (',') : if (!inQuote) { ret.put(key,builder.toString().trim()); builder = new StringBuilder(); break; } // drop the current key value to the return map - default: builder.append(c); // otherwise simply append to the current string - } - index++; - } - - // validate the tags against the expected list - index = 0; - if (ret.size() > expectedTagOrder.size()) throw new IllegalArgumentException("Unexpected tag count " + ret.size() + " in string " + expectedTagOrder.size()); - for (String str : ret.keySet()) { - if (!expectedTagOrder.get(index).equals(str)) throw new IllegalArgumentException("Unexpected tag " + str + " in string " + valueLine); - index++; - } - return ret; - } -} - -class VCF3Parser implements VCFLineParser { - - public Map parseLine(String valueLine, List expectedTagOrder) { - // our return map - Map ret = new LinkedHashMap(); - - // a builder to store up characters as we go - StringBuilder builder = new StringBuilder(); - - // where are we in the stream of characters? - int index = 0; - // where in the expected tag order are we? - int tagIndex = 0; - - // are we inside a quotation? we don't special case ',' then - boolean inQuote = false; - - // a little switch machine to parse out the tags. Regex ended up being really complicated and ugly - for (char c: valueLine.toCharArray()) { - switch (c) { - case ('\"') : inQuote = !inQuote; break; // a quote means we ignore ',' in our strings, keep track of it - case (',') : if (!inQuote) { ret.put(expectedTagOrder.get(tagIndex++),builder.toString()); builder = new StringBuilder(); break; } // drop the current key value to the return map - default: builder.append(c); // otherwise simply append to the current string - } - index++; - } - ret.put(expectedTagOrder.get(tagIndex++),builder.toString()); - - // validate the tags against the expected list - index = 0; - if (tagIndex != expectedTagOrder.size()) throw new IllegalArgumentException("Unexpected tag count " + tagIndex + ", we expected " + expectedTagOrder.size()); - for (String str : ret.keySet()){ - if (!expectedTagOrder.get(index).equals(str)) throw new IllegalArgumentException("Unexpected tag " + str + " in string " + valueLine); - index++; - } - return ret; - } -} \ No newline at end of file diff --git a/java/src/org/broad/tribble/vcf/VCFHeaderLineType.java b/java/src/org/broad/tribble/vcf/VCFHeaderLineType.java deleted file mode 100644 index 1aeb13553..000000000 --- a/java/src/org/broad/tribble/vcf/VCFHeaderLineType.java +++ /dev/null @@ -1,28 +0,0 @@ -package org.broad.tribble.vcf; - -/** - * the type encodings we use for fields in VCF header lines - */ -public enum VCFHeaderLineType { - Integer, Float, String, Character, Flag; - - public Object convert(String value, VCFCompoundHeaderLine.SupportedHeaderLineType hlt) { - switch (this) { - case Integer: - return Math.round(java.lang.Float.valueOf(value)); // this seems like we're allowing off spec values, but use it for now - case Float: - return java.lang.Float.valueOf(value); - case String: - return value; - case Character: - if (value.length()!= 0) - throw new IllegalStateException("INFO_TYPE." + this + " requires fields of length 1, what was provided was " + value); - return value; - case Flag: - if (hlt.allowFlagValues) - return value.equals("0") ? false : true; - default: - throw new IllegalStateException("INFO_TYPE." + this + " doesn't have a set conversion approach"); - } - } -} diff --git a/java/src/org/broad/tribble/vcf/VCFHeaderVersion.java b/java/src/org/broad/tribble/vcf/VCFHeaderVersion.java deleted file mode 100644 index 9803cf04e..000000000 --- a/java/src/org/broad/tribble/vcf/VCFHeaderVersion.java +++ /dev/null @@ -1,78 +0,0 @@ -package org.broad.tribble.vcf; - -/** - * information that identifies each header version - */ -public enum VCFHeaderVersion { - VCF3_2("VCRv3.2","format"), - VCF3_3("VCFv3.3","fileformat"), - VCF4_0("VCFv4.0","fileformat"); - - private final String versionString; - private final String formatString; - - /** - * create the enum, privately, using: - * @param vString the version string - * @param fString the format string - */ - VCFHeaderVersion(String vString, String fString) { - this.versionString = vString; - this.formatString = fString; - } - - /** - * get the header version - * @param version the version string - * @param format the format string - * @return a VCFHeaderVersion object - */ - public static VCFHeaderVersion toHeaderVersion(String version, String format) { - for (VCFHeaderVersion hv : VCFHeaderVersion.values()) - if (hv.versionString.equals(version) && hv.formatString.equals(format)) - return hv; - return null; - } - - /** - * get the header version - * @param version the version string - * @return a VCFHeaderVersion object - */ - public static VCFHeaderVersion toHeaderVersion(String version) { - for (VCFHeaderVersion hv : VCFHeaderVersion.values()) - if (hv.versionString.equals(version)) - return hv; - return null; - } - - /** - * are we a valid version string of some type - * @param version the version string - * @return true if we're valid of some type, false otherwise - */ - public static boolean isVersionString(String version){ - return toHeaderVersion(version) != null; - } - - /** - * are we a valid format string for some type - * @param format the format string - * @return true if we're valid of some type, false otherwise - */ - public static boolean isFormatString(String format){ - for (VCFHeaderVersion hv : VCFHeaderVersion.values()) - if (hv.formatString.equals(format)) - return true; - return false; - } - - - public String getVersionString() { - return versionString; - } - - public String getFormatString() { - return formatString; - } -} diff --git a/java/src/org/broad/tribble/vcf/VCFInfoHeaderLine.java b/java/src/org/broad/tribble/vcf/VCFInfoHeaderLine.java deleted file mode 100755 index 111338df0..000000000 --- a/java/src/org/broad/tribble/vcf/VCFInfoHeaderLine.java +++ /dev/null @@ -1,25 +0,0 @@ -package org.broad.tribble.vcf; - - -/** - * @author ebanks - *

- * Class VCFInfoHeaderLine - *

- * A class representing a key=value entry for INFO fields in the VCF header - */ -public class VCFInfoHeaderLine extends VCFCompoundHeaderLine { - public VCFInfoHeaderLine(String name, int count, VCFHeaderLineType type, String description) { - super(name, count, type, description, SupportedHeaderLineType.INFO); - } - - protected VCFInfoHeaderLine(String line, VCFHeaderVersion version) { - super(line, version, SupportedHeaderLineType.INFO); - } - - // info fields allow flag values - @Override - boolean allowFlagValues() { - return true; - } -} diff --git a/java/src/org/broad/tribble/vcf/VCFNamedHeaderLine.java b/java/src/org/broad/tribble/vcf/VCFNamedHeaderLine.java deleted file mode 100755 index 14f0a5d9d..000000000 --- a/java/src/org/broad/tribble/vcf/VCFNamedHeaderLine.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2010, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broad.tribble.vcf; - -/** an interface for named header lines **/ -public interface VCFNamedHeaderLine { - String getName(); -} diff --git a/java/src/org/broad/tribble/vcf/VCFReaderUtils.java b/java/src/org/broad/tribble/vcf/VCFReaderUtils.java deleted file mode 100644 index 1aab6948b..000000000 --- a/java/src/org/broad/tribble/vcf/VCFReaderUtils.java +++ /dev/null @@ -1,209 +0,0 @@ -package org.broad.tribble.vcf; - - - -import java.util.*; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -/** The VCFReaderUtils class, which contains a collection of utilities for working with VCF files */ -public class VCFReaderUtils { - - // our pattern matching for the genotype mFields - private static final Pattern gtPattern = Pattern.compile("([0-9\\.]+)([\\\\|\\/])([0-9\\.]*)"); - - /** - * create a VCF header, given an array of strings that all start with at least the # character. This function is - * package protected so that the VCFReaderUtils can access this function - * - * @param headerStrings a list of header strings - * @param version Header version to parse - * @return a VCF Header created from the list of stinrgs - */ - public static VCFHeader createHeader(List headerStrings, VCFHeaderVersion version) { - Set metaData = new TreeSet(); - Set auxTags = new LinkedHashSet(); - // iterate over all the passed in strings - for ( String str : headerStrings ) { - if ( !str.startsWith(VCFHeader.METADATA_INDICATOR) ) { - String[] strings = str.substring(1).split(VCFConstants.FIELD_SEPARATOR); - int arrayIndex = 0; - for (VCFHeader.HEADER_FIELDS field : VCFHeader.HEADER_FIELDS.values()) { - try { - if (field != VCFHeader.HEADER_FIELDS.valueOf(strings[arrayIndex])) - throw new RuntimeException("VCFReaderUtils: we were expecting column name " + field + " but we saw " + strings[arrayIndex]); - } catch (IllegalArgumentException e) { - throw new RuntimeException("VCFReaderUtils: Unknown column name \"" + strings[arrayIndex] + "\", it does not match a known column header name."); - } - arrayIndex++; - } - if ( arrayIndex < strings.length ) { - if ( !strings[arrayIndex].equals("FORMAT") ) - throw new RuntimeException("VCFReaderUtils: we were expecting column name FORMAT but we saw " + strings[arrayIndex]); - arrayIndex++; - } - - while (arrayIndex < strings.length) - auxTags.add(strings[arrayIndex++]); - - } else { - if ( str.startsWith("##INFO=") ) - metaData.add(new VCFInfoHeaderLine(str.substring(7),version)); - else if ( str.startsWith("##FILTER=") ) - metaData.add(new VCFFilterHeaderLine(str.substring(9),version)); - else if ( str.startsWith("##FORMAT=") ) - metaData.add(new VCFFormatHeaderLine(str.substring(9),version)); - else { - int equals = str.indexOf("="); - if ( equals != -1 ) - metaData.add(new VCFHeaderLine(str.substring(2, equals), str.substring(equals+1))); - } - } - } - - return new VCFHeader(metaData, auxTags); - } - - /** - * create the next VCFRecord, given the input line - * - * @param line the line from the file - * @param mHeader the VCF header - * - * @return the VCFRecord - */ - public static VCFRecord createRecord(String line, VCFHeader mHeader) { - return createRecord(line, mHeader, false); - } - - public static VCFRecord createRecord(String line, VCFHeader mHeader, boolean ignoreGenotypes) { - // things we need to make a VCF record - Map values = new HashMap(); - String tokens[] = line.split("\\t"); - - // check to ensure that the column count of tokens is right - if (tokens.length != mHeader.getColumnCount()) { - throw new RuntimeException("The input file line doesn't contain enough fields, it should have " + mHeader.getColumnCount() + " fields, it has " + tokens.length + ". Line = " + line); - } - - int index = 0; - for (VCFHeader.HEADER_FIELDS field : mHeader.getHeaderFields()) - values.put(field, tokens[index++]); - // if we have genotyping data, we try and extract the genotype fields - if ( ! ignoreGenotypes && mHeader.hasGenotypingData()) { - String mFormatString = tokens[index]; - String keyStrings[] = mFormatString.split(":"); - List genotypeRecords = new ArrayList(); - index++; - String[] alt_alleles = values.get(VCFHeader.HEADER_FIELDS.ALT).split(","); - for (String str : mHeader.getGenotypeSamples()) { - genotypeRecords.add(getVCFGenotype(str, keyStrings, tokens[index], alt_alleles, values.get(VCFHeader.HEADER_FIELDS.REF).charAt(0))); - index++; - } - VCFRecord vrec = new VCFRecord(values, mFormatString, genotypeRecords); - // associate the genotypes with this new record - for ( VCFGenotypeRecord gr : genotypeRecords ) - gr.setVCFRecord(vrec); - return vrec; - - } - return new VCFRecord(values); - } - - /** - * generate a VCF genotype record, given it's format string, the genotype string, and allele info - * - * @param sampleName the sample name - * @param formatString the format string for this record, which contains the keys for the genotype parameters - * @param genotypeString contains the phasing information, allele information, and values for genotype parameters - * @param altAlleles the alternate allele string array, which we index into based on the field parameters - * @param referenceBase the reference base - * - * @return a VCFGenotypeRecord - */ - public static VCFGenotypeRecord getVCFGenotype(String sampleName, String formatString, String genotypeString, String altAlleles[], char referenceBase) { - return getVCFGenotype(sampleName, formatString.split(":"), genotypeString, altAlleles, referenceBase); - } - - /** - * generate a VCF genotype record, given it's format string, the genotype string, and allele info - * - * @param sampleName the sample name - * @param keyStrings the split format string for this record, which contains the keys for the genotype parameters - * @param genotypeString contains the phasing information, allele information, and values for genotype parameters - * @param altAlleles the alternate allele string array, which we index into based on the field parameters - * @param referenceBase the reference base - * - * @return a VCFGenotypeRecord - */ - public static VCFGenotypeRecord getVCFGenotype(String sampleName, String[] keyStrings, String genotypeString, String altAlleles[], char referenceBase) { - // parameters to create the VCF genotype record - HashMap tagToValue = new HashMap(); - VCFGenotypeRecord.PHASE phase = VCFGenotypeRecord.PHASE.UNPHASED; - List bases = new ArrayList(); - - for (String key : keyStrings) { - String parse; - int nextDivider; - if (!genotypeString.contains(":")) { - nextDivider = genotypeString.length(); - parse = genotypeString; - } else { - nextDivider = (genotypeString.indexOf(":") > genotypeString.length()) ? genotypeString.length() : genotypeString.indexOf(":"); - parse = genotypeString.substring(0, nextDivider); - } - if (key.equals(VCFConstants.GENOTYPE_KEY)) { - Matcher m = gtPattern.matcher(parse); - if (!m.matches()) - throw new RuntimeException("VCFReaderUtils: Unable to match GT genotype flag to it's expected pattern, the field was: " + parse); - phase = VCFGenotypeRecord.determinePhase(m.group(2)); - addAllele(m.group(1), altAlleles, referenceBase, bases); - if (m.group(3).length() > 0) addAllele(m.group(3), altAlleles, referenceBase, bases); - } else { - if ( parse.length() == 0 ) - parse = VCFGenotypeRecord.getMissingFieldValue(key); - tagToValue.put(key, parse); - } - if (nextDivider + 1 >= genotypeString.length()) nextDivider = genotypeString.length() - 1; - genotypeString = genotypeString.substring(nextDivider + 1, genotypeString.length()); - } - if ( bases.size() > 0 && bases.get(0).equals(VCFConstants.EMPTY_ALLELE) ) - tagToValue.clear(); - // catch some common errors, either there are too many field keys or there are two many field values - else if ( keyStrings.length != tagToValue.size() + ((bases.size() > 0) ? 1 : 0)) - throw new RuntimeException("VCFReaderUtils: genotype value count doesn't match the key count (expected " - + keyStrings.length + " but saw " + tagToValue.size() + ")"); - else if ( genotypeString.length() > 0 ) - throw new RuntimeException("VCFReaderUtils: genotype string contained additional unprocessed fields: " + genotypeString - + ". This most likely means that the format string is shorter then the value fields."); - - VCFGenotypeRecord rec = new VCFGenotypeRecord(sampleName, bases, phase); - for ( Map.Entry entry : tagToValue.entrySet() ) - rec.setField(entry.getKey(), entry.getValue()); - return rec; - } - - - /** - * add an alternate allele to the list of alleles we have for a VCF genotype record - * - * @param alleleNumber the allele number, as a string - * @param altAlleles the list of alternate alleles - * @param referenceBase the reference base - * @param bases the list of bases for this genotype call - */ - private static void addAllele(String alleleNumber, String[] altAlleles, char referenceBase, List bases) { - if (alleleNumber.equals(VCFConstants.EMPTY_ALLELE)) { - bases.add(new VCFGenotypeEncoding(VCFConstants.EMPTY_ALLELE)); - } else { - int alleleValue = Integer.valueOf(alleleNumber); - // check to make sure the allele value is within bounds - if (alleleValue < 0 || alleleValue > altAlleles.length) - throw new IllegalArgumentException("VCFReaderUtils: the allele value of " + alleleValue + " is out of bounds given the alternate allele list."); - if (alleleValue == 0) - bases.add(new VCFGenotypeEncoding(String.valueOf(referenceBase))); - else - bases.add(new VCFGenotypeEncoding(altAlleles[alleleValue - 1])); - } - } -} diff --git a/java/src/org/broad/tribble/vcf/VCFRecord.java b/java/src/org/broad/tribble/vcf/VCFRecord.java deleted file mode 100644 index 50a673b69..000000000 --- a/java/src/org/broad/tribble/vcf/VCFRecord.java +++ /dev/null @@ -1,652 +0,0 @@ -package org.broad.tribble.vcf; - - -import org.broad.tribble.Feature; -import org.broad.tribble.util.ParsingUtils; - -import java.util.*; - -/** the basic VCF record type */ -public class VCFRecord implements Feature { - - // the reference base - private String mReferenceBases; - // our location - private String mContig; - private int mPosition; - // our id - private String mID; - // the alternate bases - private final List mAlts = new ArrayList(); - // our qual value - private double mQual; - // our filter string - private String mFilterString; - // our info fields -- use a TreeMap to ensure they can be pulled out in order (so it passes integration tests) - private final Map mInfoFields = new TreeMap(); - - // our genotype formatting string - private String mGenotypeFormatString; - - // the vcf header we're associated with - private VCFHeader vcfHeader = null; - - // our genotype sample fields - private final List mGenotypeRecords = new ArrayList(); - - /** - * given a reference base, a location, and the format string, create a VCF record. - * - * @param referenceBases the reference bases to use - * @param contig our contig - * @param start the start location - * @param genotypeFormatString the format string - */ - public VCFRecord(String referenceBases, String contig, int start, String genotypeFormatString) { - setReferenceBase(referenceBases); - setLocation(contig, start); - mGenotypeFormatString = genotypeFormatString; - } - - /** - * given the values for each of the columns, create a VCF record. - * - * @param columnValues a mapping of header strings to values - * @param genotypeFormatString the format string for the genotype records - * @param genotypeRecords the genotype records - */ - public VCFRecord(Map columnValues, String genotypeFormatString, List genotypeRecords) { - extractFields(columnValues); - mGenotypeRecords.addAll(genotypeRecords); - mGenotypeFormatString = genotypeFormatString; - } - - /** - * given the values for each of the columns, create a VCF record. - * - * @param columnValues a mapping of header strings to values - */ - public VCFRecord(Map columnValues) { - extractFields(columnValues); - mGenotypeFormatString = ""; - } - - /** - * create a VCF record - * - * @param referenceBases the reference bases to use - * @param contig the contig this variant is on - * @param position our position - * @param ID our ID string - * @param altBases the list of alternate bases - * @param qual the qual field - * @param filters the filters used on this variant - * @param infoFields the information fields - * @param genotypeFormatString the format string - * @param genotypeObjects the genotype objects - */ - public VCFRecord(String referenceBases, - String contig, - long position, - String ID, - List altBases, - double qual, - String filters, - Map infoFields, - String genotypeFormatString, - List genotypeObjects) { - setReferenceBase(referenceBases); - setLocation(contig, position); - this.mID = ID; - for (VCFGenotypeEncoding alt : altBases) - this.addAlternateBase(alt); - this.setQual(qual); - this.setFilterString(filters); - this.mInfoFields.putAll(infoFields); - this.mGenotypeFormatString = genotypeFormatString; - this.mGenotypeRecords.addAll(genotypeObjects); - } - - /** - * extract the field values from the passed in array - * - * @param columnValues a map of the header fields to values - */ - private void extractFields(Map columnValues) { - String chrom = null; - long position = -1; - - for (VCFHeader.HEADER_FIELDS val : columnValues.keySet()) { - switch (val) { - case CHROM: - chrom = columnValues.get(val); - break; - case POS: - position = Integer.valueOf(columnValues.get(val)); - break; - case ID: - setID(columnValues.get(val)); - break; - case REF: - if (columnValues.get(val).length() != 1) - throw new IllegalArgumentException("Reference base should be a single character"); - setReferenceBase(columnValues.get(val)); - break; - case ALT: - String values[] = columnValues.get(val).split(","); - for (String alt : values) - addAlternateBase(new VCFGenotypeEncoding(alt)); - break; - case QUAL: - setQual(Double.valueOf(columnValues.get(val))); - break; - case FILTER: - setFilterString(columnValues.get(val)); - break; - case INFO: - String vals[] = columnValues.get(val).split(";"); - for (String alt : vals) { - if ( alt.equals(VCFConstants.EMPTY_INFO_FIELD) ) - continue; - String keyVal[] = alt.split("="); - if ( keyVal.length == 1 ) - addInfoField(keyVal[0], ""); - else if (keyVal.length == 2) - addInfoField(keyVal[0], keyVal[1]); - else - throw new IllegalArgumentException("info field key-value pair did not parse into key->value pair: " + alt); - } - break; - } - } - setLocation(chrom, position); - } - - /** - * do we have genotyping data - * - * @return true if we have genotyping data, false otherwise - */ - - public boolean hasGenotypeData() { - return (mGenotypeRecords.size() > 0); - } - - /** - * @return the ID value for this record - */ - public String getID() { - return mID == null ? VCFConstants.EMPTY_ID_FIELD : mID; - } - - /** - * get the reference base - * - * @return either A, T, C, G, or N - */ - public String getReference() { - return mReferenceBases; - } - - /** - * get the alternate allele strings - * - * @return an array of strings representing the alt alleles, or null if there are none - */ - public List getAlternateAlleleList() { - ArrayList alts = new ArrayList(); - for ( VCFGenotypeEncoding alt : mAlts ) - alts.add(alt.getBases()); - return alts; - } - - public List getAlternateAlleles() { - return mAlts; - } - - public boolean hasAlternateAllele() { - for ( VCFGenotypeEncoding alt : mAlts ) { - if ( alt.getType() != VCFGenotypeEncoding.TYPE.UNCALLED ) - return true; - } - - return false; - } - - public boolean isBiallelic() { - return getAlternateAlleles().size() == 1; - } - - public boolean isReference() { - return !hasAlternateAllele(); - } - - public List getAlleleList() { - ArrayList list = new ArrayList(); - list.add(getReference()); - list.addAll(getAlternateAlleleList()); - return list; - } - - public double getNonRefAlleleFrequency() { - if ( mInfoFields.containsKey(VCFConstants.ALLELE_FREQUENCY_KEY) ) { - return Double.valueOf(mInfoFields.get(VCFConstants.ALLELE_FREQUENCY_KEY)); - } else { - // this is the poor man's AF - if ( mInfoFields.containsKey(VCFConstants.ALLELE_COUNT_KEY) && mInfoFields.containsKey(VCFConstants.ALLELE_NUMBER_KEY)) { - String splt[] = mInfoFields.get(VCFConstants.ALLELE_COUNT_KEY).split(","); - if ( splt.length > 0 ) { - return (Double.valueOf(splt[0]) / Double.valueOf(mInfoFields.get(VCFConstants.ALLELE_NUMBER_KEY))); - } - } - } - - return 0.0; - } - - public VCFGenotypeEncoding.TYPE getType() { - VCFGenotypeEncoding.TYPE type = mAlts.get(0).getType(); - for (int i = 1; i < mAlts.size(); i++) { - if ( mAlts.get(i).getType() != type ) - return VCFGenotypeEncoding.TYPE.MIXED; // if we have more than one type, return mixed - } - return type; - } - - public boolean isDeletion() { - return getType() == VCFGenotypeEncoding.TYPE.DELETION; - } - - public boolean isInsertion() { - return getType() == VCFGenotypeEncoding.TYPE.INSERTION; - } - - public boolean isIndel() { - return isDeletion() || isInsertion(); - } - - public boolean isSNP() { - return getType() == VCFGenotypeEncoding.TYPE.SINGLE_BASE; - } - - public boolean isNovel() { - return ( ! isInDBSNP() ) && ( ! isInHapmap() ); - } - - public boolean isInDBSNP() { - return ( ( mID != null && ! mID.equals(".") ) || ( mInfoFields.get(VCFConstants.DBSNP_KEY) != null && mInfoFields.get(VCFConstants.DBSNP_KEY).equals("1") ) ); - } - - public boolean isInHapmap() { - if ( mInfoFields.get(VCFConstants.HAPMAP2_KEY) != null && mInfoFields.get(VCFConstants.HAPMAP2_KEY).equals("1") ) { - return true; - } else { - return ( mInfoFields.get(VCFConstants.HAPMAP3_KEY) != null && mInfoFields.get(VCFConstants.HAPMAP3_KEY).equals("1") ); - } - } - - public char getAlternativeBaseForSNP() { - if ( !isSNP() && !isBiallelic() ) - throw new IllegalStateException("This record does not represent a SNP"); - return mAlts.get(0).getBases().charAt(0); - } - - public char getReferenceForSNP() { - if ( !isSNP() ) - throw new IllegalStateException("This record does not represent a SNP"); - return getReference().charAt(0); - } - - /** - * @return the phred-scaled quality score - */ - public double getQual() { - return mQual; - } - - public int getPosition() { - return mPosition; - } - - public boolean isMissingQual() { - return VCFConstants.MISSING_GENOTYPE_QUALITY_v3.equals(String.valueOf((int)mQual)); - } - - /** - * @return the -log10PError - */ - public double getNegLog10PError() { - return mQual / 10.0; - } - - /** - * get the filter criteria - * - * @return an array of strings representing the filtering criteria, or UNFILTERED if none are applied - */ - public String[] getFilteringCodes() { - if (mFilterString == null) return new String[]{VCFConstants.UNFILTERED}; - return mFilterString.split(VCFConstants.FILTER_CODE_SEPARATOR); - } - - public boolean isFiltered() { - String[] codes = getFilteringCodes(); - return !codes[0].equals(VCFConstants.UNFILTERED) && !codes[0].equals(VCFConstants.PASSES_FILTERS_v3); - } - -// public boolean hasFilteringCodes() { -// return mFilterString != null; -// } - - public String getFilterString() { - return mFilterString; - } - - /** - * get the information key-value pairs as a Map<> - * - * @return a map, of the info key-value pairs - */ - public final Map getInfoValues() { - return mInfoFields; - } - - public List getVCFGenotypeRecords() { - return mGenotypeRecords; - } - - /** - * @return a List of the sample names - */ - public String[] getSampleNames() { - String names[] = new String[mGenotypeRecords.size()]; - for (int i = 0; i < mGenotypeRecords.size(); i++) { - names[i] = mGenotypeRecords.get(i).getSampleName(); - } - return names; - } - - public VCFGenotypeRecord getGenotype(final String sampleName) { - for ( VCFGenotypeRecord rec : getVCFGenotypeRecords() ) { - if ( rec.getSampleName().equals(sampleName) ) { - return rec; - } - } - - return null; - } - - public String getGenotypeFormatString() { - return mGenotypeFormatString; - }// the formatting string for our genotype records - - public void setGenotypeFormatString(String newFormatString) { - mGenotypeFormatString = newFormatString; - } - - public void setReferenceBase(String reference) { - mReferenceBases = reference.toUpperCase(); - } - - public void setLocation(String chrom, long position) { - if ( chrom == null ) - throw new IllegalArgumentException("Chromosomes cannot be missing"); - if ( position < 0 ) - throw new IllegalArgumentException("Position values must be greater than 0"); - this.mContig = chrom; - this.mPosition = (int)position; - } - - public void setID(String ID) { - mID = ID; - } - - public void setQual(double qual) { - if ( qual < 0 && !VCFConstants.MISSING_GENOTYPE_QUALITY_v3.equals(String.valueOf((int)qual)) ) - throw new IllegalArgumentException("Qual values cannot be negative unless they are " + VCFConstants.MISSING_GENOTYPE_QUALITY_v3 + " ('unknown')"); - mQual = qual; - } - - public void setFilterString(String filterString) { - mFilterString = filterString; - } - - public void addGenotypeRecord(VCFGenotypeRecord mGenotypeRecord) { - mGenotypeRecords.add(mGenotypeRecord); - } - - public void setGenotypeRecords(List records) { - mGenotypeRecords.clear(); - for ( VCFGenotypeRecord g : records ) - addGenotypeRecord(g); - } - - /** - * add an alternate base to our alternate base list. All bases are uppercased - * before being added to the list. - * - * @param base the base to add - */ - public void addAlternateBase(VCFGenotypeEncoding base) { - if (!mAlts.contains(base)) mAlts.add(base); - } - - public void setAlternateBases(List bases) { - mAlts.clear(); - for ( VCFGenotypeEncoding e : bases ) - addAlternateBase(e); - } - - /** - * add an info field to the record - * - * @param key the key, from the spec or a user created key - * @param value it's value as a string - */ - public void addInfoField(String key, String value) { - //System.out.printf("Adding info field %s=%s%n", key, value); - mInfoFields.put(key, value); - } - - public void printInfoFields() { - for ( Map.Entry e : mInfoFields.entrySet() ) { - System.out.printf(" Current info field %s=%s this=%s%n", e.getKey(), e.getValue(), this); - } - } - - - /** - * add an info field to the record - * - * @param m A map from info keys to info values - */ - public void addInfoFields(Map m) { - for ( Map.Entry e : m.entrySet() ) - addInfoField(e.getKey(), e.getValue()); - } - - - /** - * the generation of a string representation, which is used by the VCF writer - * - * @param header the VCF header for this VCF Record - * @return a string - */ - public String toStringEncoding(VCFHeader header) { - StringBuilder builder = new StringBuilder(); - - // CHROM \t POS \t ID \t REF \t ALT \t QUAL \t FILTER \t INFO - builder.append(mContig); - builder.append(VCFConstants.FIELD_SEPARATOR); - builder.append(mPosition); - builder.append(VCFConstants.FIELD_SEPARATOR); - builder.append(getID()); - builder.append(VCFConstants.FIELD_SEPARATOR); - builder.append(getReference()); - builder.append(VCFConstants.FIELD_SEPARATOR); - List alts = getAlternateAlleles(); - if ( alts.size() > 0 ) { - builder.append(alts.get(0)); - for ( int i = 1; i < alts.size(); i++ ) { - builder.append(","); - builder.append(alts.get(i)); - } - } else { - builder.append(VCFConstants.EMPTY_ALTERNATE_ALLELE_FIELD); - } - builder.append(VCFConstants.FIELD_SEPARATOR); - if ( isMissingQual() ) - builder.append(VCFConstants.MISSING_GENOTYPE_QUALITY_v3); - else - builder.append(String.format(VCFConstants.DOUBLE_PRECISION_FORMAT_STRING, mQual)); - builder.append(VCFConstants.FIELD_SEPARATOR); - builder.append(ParsingUtils.join(VCFConstants.FILTER_CODE_SEPARATOR, getFilteringCodes())); - builder.append(VCFConstants.FIELD_SEPARATOR); - builder.append(createInfoString()); - - if ( mGenotypeFormatString != null && mGenotypeFormatString.length() > 0 ) { -// try { - addGenotypeData(builder, header); -// } catch (Exception e) { -// if ( validationStringency == VCFGenotypeWriter.VALIDATION_STRINGENCY.STRICT ) -// throw new RuntimeException(e); -// } - } - - return builder.toString(); - } - - /** - * create the info string - * - * @return a string representing the infomation fields - */ - protected String createInfoString() { - StringBuffer info = new StringBuffer(); - boolean isFirst = true; - for (Map.Entry entry : mInfoFields.entrySet()) { - if ( isFirst ) - isFirst = false; - else - info.append(VCFConstants.INFO_FIELD_SEPARATOR); - info.append(entry.getKey()); - if ( entry.getValue() != null && !entry.getValue().equals("") ) { - info.append("="); - info.append(entry.getValue()); - } - } - return info.length() == 0 ? VCFConstants.EMPTY_INFO_FIELD : info.toString(); - } - - /** - * add the genotype data - * - * @param builder the string builder - * @param header the header object - */ - private void addGenotypeData(StringBuilder builder, VCFHeader header) { - Map gMap = genotypeListToMap(getVCFGenotypeRecords()); - - StringBuffer tempStr = new StringBuffer(); - if ( header.getGenotypeSamples().size() < getVCFGenotypeRecords().size() ) { - for ( String sample : gMap.keySet() ) { - if ( !header.getGenotypeSamples().contains(sample) ) - System.err.println("Sample " + sample + " is a duplicate or is otherwise not present in the header"); - else - header.getGenotypeSamples().remove(sample); - } - throw new IllegalStateException("We have more genotype samples than the header specified; please check that samples aren't duplicated"); - } - tempStr.append(VCFConstants.FIELD_SEPARATOR + mGenotypeFormatString); - - String[] genotypeFormatStrings = mGenotypeFormatString.split(":"); - - for ( String genotype : header.getGenotypeSamples() ) { - tempStr.append(VCFConstants.FIELD_SEPARATOR); - if ( gMap.containsKey(genotype) ) { - VCFGenotypeRecord rec = gMap.get(genotype); - tempStr.append(rec.toStringEncoding(mAlts, genotypeFormatStrings)); - gMap.remove(genotype); - } else { - tempStr.append(VCFGenotypeRecord.stringEncodingForEmptyGenotype(genotypeFormatStrings)); - } - } - if ( gMap.size() != 0 ) { - for ( String sample : gMap.keySet() ) - System.err.println("Sample " + sample + " is being genotyped but isn't in the header."); - throw new IllegalStateException("We failed to use all the genotype samples; there must be an inconsistancy between the header and records"); - } - - builder.append(tempStr); - } - - /** - * compare two VCF records - * - * @param other the other VCF record - * @return true if they're equal - */ - public boolean equals(VCFRecord other) { - if (!this.mAlts.equals(other.mAlts)) return false; - if (!this.mReferenceBases.equals(other.mReferenceBases)) return false; - if (!this.mContig.equals(other.mContig)) return false; - if (mPosition != other.mPosition) return false; - if (!this.mID.equals(other.mID)) return false; - if (this.mQual != other.mQual) return false; - if ( this.mFilterString == null ) { - if ( other.mFilterString != null ) return false; - } else if ( !this.mFilterString.equals(other.mFilterString) ) return false; - if (!this.mInfoFields.equals(other.mInfoFields)) return false; - if (!this.mGenotypeRecords.equals(other.mGenotypeRecords)) return false; - return true; - } - - /** - * create a genotype mapping from a list and their sample names - * - * @param list a list of genotype samples - * @return a mapping of the sample name to VCF genotype record - */ - private static Map genotypeListToMap(List list) { - Map map = new HashMap(); - for (int i = 0; i < list.size(); i++) { - VCFGenotypeRecord rec = list.get(i); - map.put(rec.getSampleName(), rec); - } - return map; - } - - /** Return the features reference sequence name, e.g chromosome or contig */ - public String getChr() { - return this.mContig; - } - - /** Return the start position in 1-based coordinates (first base is 1) */ - public int getStart() { - return this.mPosition; - } - - /** - * Return the end position following 1-based fully closed conventions. The length of a feature is - * end - start + 1; - */ - public int getEnd() { - return this.mPosition; - } - - /** - * set the VCF header we're associated with - * @param header the header - */ - void setHeader(VCFHeader header) { - vcfHeader = header; - } - - /** - * get the associated header - * @return the VCF Header - */ - public VCFHeader getHeader() { - return vcfHeader; - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/Allele.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/Allele.java deleted file mode 100755 index a595bee9b..000000000 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/Allele.java +++ /dev/null @@ -1,404 +0,0 @@ -package org.broadinstitute.sting.gatk.contexts.variantcontext; - -import java.util.Arrays; -import java.util.List; -import java.util.ArrayList; -import java.util.Collection; - -/** - * Immutable representation of an allele - * - * Types of alleles: - * - * Ref: a t C g a // C is the reference base - * - * : a t G g a // C base is a G in some individuals - * - * : a t - g a // C base is deleted w.r.t. the reference - * - * : a t CAg a // A base is inserted w.r.t. the reference sequence - * - * In these cases, where are the alleles? - * - * SNP polymorphism of C/G -> { C , G } -> C is the reference allele - * 1 base deletion of C -> { C , - } -> C is the reference allele - * 1 base insertion of A -> { - ; A } -> Null is the reference allele - * - * Suppose I see a the following in the population: - * - * Ref: a t C g a // C is the reference base - * : a t G g a // C base is a G in some individuals - * : a t - g a // C base is deleted w.r.t. the reference - * - * How do I represent this? There are three segregating alleles: - * - * { C , G , - } - * - * Now suppose I have this more complex example: - * - * Ref: a t C g a // C is the reference base - * : a t - g a - * : a t - - a - * : a t CAg a - * - * There are actually four segregating alleles: - * - * { C g , - g, - -, and CAg } over bases 2-4 - * - * However, the molecular equivalence explicitly listed above is usually discarded, so the actual - * segregating alleles are: - * - * { C g, g, -, C a g } - * - * Critically, it should be possible to apply an allele to a reference sequence to create the - * correct haplotype sequence: - * - * Allele + reference => haplotype - * - * For convenience, we are going to create Alleles where the GenomeLoc of the allele is stored outside of the - * Allele object itself. So there's an idea of an A/C polymorphism independent of it's surrounding context. - * - * Given list of alleles it's possible to determine the "type" of the variation - * - * A / C @ loc => SNP with - * - / A => INDEL - * - * If you know where allele is the reference, you can determine whether the variant is an insertion or deletion. - * - * Alelle also supports is concept of a NO_CALL allele. This Allele represents a haplotype that couldn't be - * determined. This is usually represented by a '.' allele. - * - * Note that Alleles store all bases as bytes, in **UPPER CASE**. So 'atc' == 'ATC' from the perspective of an - * Allele. - - * @author ebanks, depristo - */ -public class Allele implements Comparable { - private static final byte[] EMPTY_ALLELE_BASES = new byte[0]; - - private boolean isRef = false; - private boolean isNull = false; - private boolean isNoCall = false; - - private byte[] bases = null; - - public final static String NULL_ALLELE_STRING = "-"; - public final static String NO_CALL_STRING = "."; - /** A generic static NO_CALL allele for use */ - - // no public way to create an allele - private Allele(byte[] bases, boolean isRef) { - // standardize our representation of null allele and bases - if ( wouldBeNullAllele(bases) ) { - bases = EMPTY_ALLELE_BASES; - isNull = true; - } else if ( wouldBeNoCallAllele(bases) ) { - bases = EMPTY_ALLELE_BASES; - isNoCall = true; - if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele"); - } -// else -// bases = new String(bases).toUpperCase().getBytes(); // todo -- slow performance - - this.isRef = isRef; - this.bases = bases; - - if ( ! acceptableAlleleBases(bases,isRef) ) - throw new IllegalArgumentException("Unexpected base in allele bases " + new String(bases)); - } - - private Allele(String bases, boolean isRef) { - this(bases.getBytes(), isRef); - } - - - private final static Allele REF_A = new Allele("A", true); - private final static Allele ALT_A = new Allele("A", false); - private final static Allele REF_C = new Allele("C", true); - private final static Allele ALT_C = new Allele("C", false); - private final static Allele REF_G = new Allele("G", true); - private final static Allele ALT_G = new Allele("G", false); - private final static Allele REF_T = new Allele("T", true); - private final static Allele ALT_T = new Allele("T", false); - private final static Allele REF_N = new Allele("N", true); - private final static Allele ALT_N = new Allele("N", false); - private final static Allele REF_NULL = new Allele("-", true); - private final static Allele ALT_NULL = new Allele("-", false); - public final static Allele NO_CALL = new Allele(NO_CALL_STRING, false); - - // --------------------------------------------------------------------------------------------------------- - // - // creation routines - // - // --------------------------------------------------------------------------------------------------------- - - /** - * Create a new Allele that includes bases and if tagged as the reference allele if isRef == true. If bases - * == '-', a Null allele is created. If bases == '.', a no call Allele is created. - * - * @param bases the DNA sequence of this variation, '-', of '.' - * @param isRef should we make this a reference allele? - * @throws IllegalArgumentException if bases contains illegal characters or is otherwise malformated - */ - public static Allele create(byte[] bases, boolean isRef) { - if ( bases == null ) - throw new IllegalArgumentException("create: the Allele base string cannot be null; use new Allele() or new Allele(\"\") to create a Null allele"); - - if ( bases.length == 1 ) { - // optimization to return a static constant Allele for each single base object - switch (bases[0]) { - case '.': - if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele"); - return NO_CALL; - case '-': return isRef ? REF_NULL : ALT_NULL; - case 'A': return isRef ? REF_A : ALT_A; - case 'C': return isRef ? REF_C : ALT_C; - case 'G': return isRef ? REF_G : ALT_G; - case 'T': return isRef ? REF_T : ALT_T; - case 'N': return isRef ? REF_N : ALT_N; - default: throw new IllegalArgumentException("Illegal base: " + (char)bases[0]); - } - } else { - return new Allele(bases, isRef); - } - } - - public static Allele create(byte base, boolean isRef) { -// public Allele(byte base, boolean isRef) { - return create( new byte[]{ base }, isRef); - } - - public static Allele extend(Allele left, byte[] right) { - byte[] bases = null; - if ( left.length() == 0 ) - bases = right; - else { - bases = new byte[left.length() + right.length]; - System.arraycopy(left.getBases(), 0, bases, 0, left.length()); - System.arraycopy(right, 0, bases, left.length(), right.length); - } - - return create(bases, left.isReference()); - } - - /** - * @param bases bases representing an allele - * @return true if the bases represent the null allele - */ - public static boolean wouldBeNullAllele(byte[] bases) { - return (bases.length == 1 && bases[0] == '-') || bases.length == 0; - } - - /** - * @param bases bases representing an allele - * @return true if the bases represent the NO_CALL allele - */ - public static boolean wouldBeNoCallAllele(byte[] bases) { - return bases.length == 1 && bases[0] == '.'; - } - - /** - * @param bases bases representing an allele - * @param reference is this the reference allele - * @return true if the bases represent the well formatted allele - */ - public static boolean acceptableAlleleBases(String bases, boolean reference) { - return acceptableAlleleBases(bases.getBytes(),reference); - } - - /** - * @param bases bases representing an allele - * @param reference are we the reference (we allow n's in the reference allele) - * @return true if the bases represent the well formatted allele - */ - public static boolean acceptableAlleleBases(byte[] bases, boolean reference) { - if ( wouldBeNullAllele(bases) || wouldBeNoCallAllele(bases) ) - return true; - - for ( int i = 0; i < bases.length; i++ ) { - switch (bases[i]) { - case 'A': case 'C': case 'G': case 'T': case 'N' : break; - default: - return false; - } - } - - return true; - } - - /** - * @see Allele(byte[], boolean) - * - * @param bases bases representing an allele - * @param isRef is this the reference allele? - */ - public static Allele create(String bases, boolean isRef) { - //public Allele(String bases, boolean isRef) { - return create(bases.getBytes(), isRef); - } - - - /** - * Creates a non-Ref allele. @see Allele(byte[], boolean) for full information - * - * @param bases bases representing an allele - */ - public static Allele create(String bases) { - return create(bases, false); - } - - /** - * Creates a non-Ref allele. @see Allele(byte[], boolean) for full information - * - * @param bases bases representing an allele - */ - public static Allele create(byte[] bases) { - return create(bases, false); - //this(bases, false); - } - - // --------------------------------------------------------------------------------------------------------- - // - // accessor routines - // - // --------------------------------------------------------------------------------------------------------- - - //Returns true if this is the null allele - public boolean isNull() { return isNull; } - // Returns true if this is not the null allele - public boolean isNonNull() { return ! isNull(); } - - // Returns true if this is the NO_CALL allele - public boolean isNoCall() { return isNoCall; } - // Returns true if this is the not the NO_CALL allele - public boolean isCalled() { return ! isNoCall(); } - - // Returns true if this Allele is the reference allele - public boolean isReference() { return isRef; } - // Returns true if this Allele is not the reference allele - public boolean isNonReference() { return ! isReference(); } - - // Returns a nice string representation of this object - public String toString() { - return (isNull() ? "-" : ( isNoCall() ? "." : new String(getBases()))) + (isReference() ? "*" : ""); - } - - /** - * Return the DNA bases segregating in this allele. Note this isn't reference polarized, - * so the Null allele is represented by a vector of length 0 - * - * @return the segregating bases - */ - public byte[] getBases() { return bases; } - - /** - * @param other the other allele - * - * @return true if these alleles are equal - */ - public boolean equals(Object other) { - return ( ! (other instanceof Allele) ? false : equals((Allele)other, false) ); - } - - /** - * @return hash code - */ - public int hashCode() { - int hash = 1; - for (int i = 0; i < bases.length; i++) - hash += (i+1) * bases[i]; - return hash; - } - - /** - * Returns true if this and other are equal. If ignoreRefState is true, then doesn't require both alleles has the - * same ref tag - * - * @param other allele to compare to - * @param ignoreRefState if true, ignore ref state in comparison - * @return true if this and other are equal - */ - public boolean equals(Allele other, boolean ignoreRefState) { - return this == other || (isRef == other.isRef || ignoreRefState) && isNull == other.isNull && isNoCall == other.isNoCall && basesMatch(other.getBases()); - } - - /** - * @param test bases to test against - * - * @return true if this Alelle contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles - */ - public boolean basesMatch(byte[] test) { return bases == test || Arrays.equals(bases, test); } - - /** - * @param test bases to test against - * - * @return true if this Alelle contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles - */ - public boolean basesMatch(String test) { return basesMatch(test.toUpperCase().getBytes()); } - - /** - * @param test allele to test against - * - * @return true if this Alelle contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles - */ - public boolean basesMatch(Allele test) { return basesMatch(test.getBases()); } - - /** - * @return the length of this allele. Null and NO_CALL alleles have 0 length. - */ - public int length() { - return bases.length; - } - - // --------------------------------------------------------------------------------------------------------- - // - // useful static functions - // - // --------------------------------------------------------------------------------------------------------- - - public static Allele getMatchingAllele(Collection allAlleles, String alleleBases) { - return getMatchingAllele(allAlleles, alleleBases.getBytes()); - } - - public static Allele getMatchingAllele(Collection allAlleles, byte[] alleleBases) { - for ( Allele a : allAlleles ) { - if ( a.basesMatch(alleleBases) ) { - return a; - } - } - - if ( wouldBeNoCallAllele(alleleBases) ) - return NO_CALL; - else - return null; // couldn't find anything - } - - public static List resolveAlleles(List possibleAlleles, List alleleStrings) { - List myAlleles = new ArrayList(alleleStrings.size()); - - for ( String alleleString : alleleStrings ) { - Allele allele = getMatchingAllele(possibleAlleles, alleleString); - - if ( allele == null ) { - if ( Allele.wouldBeNoCallAllele(alleleString.getBytes()) ) { - allele = create(alleleString); - } else { - throw new IllegalArgumentException("Allele " + alleleString + " not present in the list of alleles " + possibleAlleles); - } - } - - myAlleles.add(allele); - } - - return myAlleles; - } - - public int compareTo(Allele other) { - if ( isReference() && other.isNonReference() ) - return -1; - else if ( isNonReference() && other.isReference() ) - return 1; - else - return new String(getBases()).compareTo(new String(other.getBases())); // todo -- potential performance issue - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/Genotype.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/Genotype.java deleted file mode 100755 index 16b1edb3b..000000000 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/Genotype.java +++ /dev/null @@ -1,216 +0,0 @@ -package org.broadinstitute.sting.gatk.contexts.variantcontext; - -import org.broadinstitute.sting.utils.Utils; - -import java.util.*; - -/** - * This class encompasses all the basic information about a genotype. It is immutable. - * - * @author Mark DePristo - */ -public class Genotype { - - public final static String PHASED_ALLELE_SEPARATOR = "|"; - public final static String UNPHASED_ALLELE_SEPARATOR = "/"; - - protected InferredGeneticContext commonInfo; - public final static double NO_NEG_LOG_10PERROR = InferredGeneticContext.NO_NEG_LOG_10PERROR; - protected List alleles = null; // new ArrayList(); - - private boolean genotypesArePhased = false; - private boolean filtersWereAppliedToContext; - - public Genotype(String sampleName, List alleles, double negLog10PError, Set filters, Map attributes, boolean genotypesArePhased) { - this.alleles = Collections.unmodifiableList(alleles); - commonInfo = new InferredGeneticContext(sampleName, negLog10PError, filters, attributes); - filtersWereAppliedToContext = filters != null; - this.genotypesArePhased = genotypesArePhased; - validate(); - } - - public Genotype(String sampleName, List alleles, double negLog10PError) { - this(sampleName, alleles, negLog10PError, null, null, false); - } - - public Genotype(String sampleName, List alleles) { - this(sampleName, alleles, NO_NEG_LOG_10PERROR, null, null, false); - } - - /** - * @return the alleles for this genotype - */ - public List getAlleles() { - return alleles; - } - - public List getAlleles(Allele allele) { - List al = new ArrayList(); - for ( Allele a : alleles ) - if ( a.equals(allele) ) - al.add(a); - - return Collections.unmodifiableList(al); - } - - public Allele getAllele(int i) { - return alleles.get(i); - } - - public boolean genotypesArePhased() { return genotypesArePhased; } - - /** - * @return the ploidy of this genotype - */ - public int getPloidy() { return alleles.size(); } - - public enum Type { - NO_CALL, - HOM_REF, - HET, - HOM_VAR - } - - public Type getType() { - Allele firstAllele = alleles.get(0); - - if ( firstAllele.isNoCall() ) { - return Type.NO_CALL; - } - - for (Allele a : alleles) { - if ( ! firstAllele.equals(a) ) - return Type.HET; - } - return firstAllele.isReference() ? Type.HOM_REF : Type.HOM_VAR; - } - - /** - * @return true if all observed alleles are the same (regardless of whether they are ref or alt) - */ - public boolean isHom() { return isHomRef() || isHomVar(); } - public boolean isHomRef() { return getType() == Type.HOM_REF; } - public boolean isHomVar() { return getType() == Type.HOM_VAR; } - - /** - * @return true if we're het (observed alleles differ) - */ - public boolean isHet() { return getType() == Type.HET; } - - /** - * @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF) - */ - public boolean isNoCall() { return getType() == Type.NO_CALL; } - public boolean isCalled() { return getType() != Type.NO_CALL; } - - public void validate() { - // todo -- add validation checking here - - if ( alleles == null ) throw new IllegalArgumentException("BUG: alleles cannot be null in setAlleles"); - if ( alleles.size() == 0) throw new IllegalArgumentException("BUG: alleles cannot be of size 0 in setAlleles"); - - int nNoCalls = 0; - for ( Allele allele : alleles ) { - if ( allele == null ) - throw new IllegalArgumentException("BUG: allele cannot be null in Genotype"); - nNoCalls += allele.isNoCall() ? 1 : 0; - } - if ( nNoCalls > 0 && nNoCalls != alleles.size() ) - throw new IllegalArgumentException("BUG: alleles include some No Calls and some Calls, an illegal state " + this); - } - - public String getGenotypeString() { - return getGenotypeString(true); - } - - public String getGenotypeString(boolean ignoreRefState) { - // Notes: - // 1. Make sure to use the appropriate separator depending on whether the genotype is phased - // 2. If ignoreRefState is true, then we want just the bases of the Alleles (ignoring the '*' indicating a ref Allele) - // 3. So that everything is deterministic with regards to integration tests, we sort Alleles (when the genotype isn't phased, of course) - return Utils.join(genotypesArePhased() ? PHASED_ALLELE_SEPARATOR : UNPHASED_ALLELE_SEPARATOR, - ignoreRefState ? getAlleleStrings() : (genotypesArePhased() ? getAlleles() : Utils.sorted(getAlleles()))); - } - - private List getAlleleStrings() { - List al = new ArrayList(); - for ( Allele a : alleles ) - al.add(new String(a.getBases())); - - return al; - } - - public String toString() { - return String.format("[GT: %s %s %s Q%.2f %s]", getSampleName(), getGenotypeString(false), getType(), getPhredScaledQual(), Utils.sortedString(getAttributes())); - } - - public String toBriefString() { - return String.format("%s:Q%.2f", getGenotypeString(false), getPhredScaledQual()); - } - - public boolean sameGenotype(Genotype other) { - return sameGenotype(other, true); - } - - public boolean sameGenotype(Genotype other, boolean ignorePhase) { - if ( getPloidy() != other.getPloidy() ) - return false; // gotta have the same number of allele to be equal for gods sake - - // algorithms are wildly different if phase is kept of ignored - if ( ignorePhase ) { - for ( int i = 0; i < getPloidy(); i++) { - Allele myAllele = getAllele(i); - Allele otherAllele = other.getAllele(i); - if ( ! myAllele.basesMatch(otherAllele) ) - return false; - } - } else { - List otherAlleles = new ArrayList(other.getAlleles()); - for ( Allele myAllele : getAlleles() ) { - Allele alleleToRemove = null; - for ( Allele otherAllele : otherAlleles ) { - if ( myAllele.basesMatch(otherAllele) ) { - alleleToRemove = otherAllele; - break; - } - } - - if ( alleleToRemove != null ) - otherAlleles.remove(alleleToRemove); - else - return false; // we couldn't find our allele - } - } - - return true; - } - - // --------------------------------------------------------------------------------------------------------- - // - // get routines to access context info fields - // - // --------------------------------------------------------------------------------------------------------- - public String getSampleName() { return commonInfo.getName(); } - public Set getFilters() { return commonInfo.getFilters(); } - public boolean isFiltered() { return commonInfo.isFiltered(); } - public boolean isNotFiltered() { return commonInfo.isNotFiltered(); } - public boolean filtersWereApplied() { return filtersWereAppliedToContext; } - public boolean hasNegLog10PError() { return commonInfo.hasNegLog10PError(); } - public double getNegLog10PError() { return commonInfo.getNegLog10PError(); } - public double getPhredScaledQual() { return commonInfo.getPhredScaledQual(); } - - public Map getAttributes() { return commonInfo.getAttributes(); } - public boolean hasAttribute(String key) { return commonInfo.hasAttribute(key); } - public Object getAttribute(String key) { return commonInfo.getAttribute(key); } - - public Object getAttribute(String key, Object defaultValue) { - return commonInfo.getAttribute(key, defaultValue); - } - - public String getAttributeAsString(String key) { return commonInfo.getAttributeAsString(key); } - public String getAttributeAsString(String key, String defaultValue) { return commonInfo.getAttributeAsString(key, defaultValue); } - public int getAttributeAsInt(String key) { return commonInfo.getAttributeAsInt(key); } - public int getAttributeAsInt(String key, int defaultValue) { return commonInfo.getAttributeAsInt(key, defaultValue); } - public double getAttributeAsDouble(String key) { return commonInfo.getAttributeAsDouble(key); } - public double getAttributeAsDouble(String key, double defaultValue) { return commonInfo.getAttributeAsDouble(key, defaultValue); } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/InferredGeneticContext.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/InferredGeneticContext.java deleted file mode 100755 index f6258ce36..000000000 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/InferredGeneticContext.java +++ /dev/null @@ -1,223 +0,0 @@ -package org.broadinstitute.sting.gatk.contexts.variantcontext; - -import org.broadinstitute.sting.utils.StingException; - -import java.util.*; - - -/** - * Common utility routines for VariantContext and Genotype - * - * @author depristo - */ -final class InferredGeneticContext { - public static final double NO_NEG_LOG_10PERROR = -1.0; - - private static Set NO_FILTERS = Collections.unmodifiableSet(new HashSet()); - private static Map NO_ATTRIBUTES = Collections.unmodifiableMap(new HashMap()); - - private double negLog10PError = NO_NEG_LOG_10PERROR; - private String name = null; - private Set filters = NO_FILTERS; - private Map attributes = NO_ATTRIBUTES; - -// public InferredGeneticContext(String name) { -// this.name = name; -// } -// -// public InferredGeneticContext(String name, double negLog10PError) { -// this(name); -// setNegLog10PError(negLog10PError); -// } - - public InferredGeneticContext(String name, double negLog10PError, Set filters, Map attributes) { - this.name = name; - setNegLog10PError(negLog10PError); - if ( filters != null ) - setFilters(filters); - if ( attributes != null ) - setAttributes(attributes); - } - - /** - * @return the name - */ - public String getName() { - return name; - } - - /** - * Sets the name - * - * @param name the name associated with this information - */ - public void setName(String name) { - if ( name == null ) throw new IllegalArgumentException("Name cannot be null " + this); - this.name = name; - } - - - // --------------------------------------------------------------------------------------------------------- - // - // Filter - // - // --------------------------------------------------------------------------------------------------------- - - public Set getFilters() { - return Collections.unmodifiableSet(filters); - } - - public boolean isFiltered() { - return filters.size() > 0; - } - - public boolean isNotFiltered() { - return ! isFiltered(); - } - - public void addFilter(String filter) { - if ( filters == NO_FILTERS ) // immutable -> mutable - filters = new HashSet(filters); - - if ( filter == null ) throw new IllegalArgumentException("BUG: Attempting to add null filter " + this); - if ( getFilters().contains(filter) ) throw new IllegalArgumentException("BUG: Attempting to add duplicate filter " + filter + " at " + this); - filters.add(filter); - } - - public void addFilters(Collection filters) { - if ( filters == null ) throw new IllegalArgumentException("BUG: Attempting to add null filters at" + this); - for ( String f : filters ) - addFilter(f); - } - - public void clearFilters() { - if ( filters == NO_FILTERS ) - filters = new HashSet(); - else - filters.clear(); - } - - public void setFilters(Collection filters) { - clearFilters(); - addFilters(filters); - } - - // --------------------------------------------------------------------------------------------------------- - // - // Working with log error rates - // - // --------------------------------------------------------------------------------------------------------- - - public boolean hasNegLog10PError() { - return getNegLog10PError() != NO_NEG_LOG_10PERROR; - } - - /** - * @return the -1 * log10-based error estimate - */ - public double getNegLog10PError() { return negLog10PError; } - public double getPhredScaledQual() { return getNegLog10PError() * 10; } - - public void setNegLog10PError(double negLog10PError) { - if ( negLog10PError < 0 && negLog10PError != NO_NEG_LOG_10PERROR ) throw new IllegalArgumentException("BUG: negLog10PError cannot be < than 0 : " + negLog10PError); - if ( Double.isInfinite(negLog10PError) ) throw new IllegalArgumentException("BUG: negLog10PError should not be Infinity"); - if ( Double.isNaN(negLog10PError) ) throw new IllegalArgumentException("BUG: negLog10PError should not be NaN"); - - this.negLog10PError = negLog10PError; - } - - // --------------------------------------------------------------------------------------------------------- - // - // Working with attributes - // - // --------------------------------------------------------------------------------------------------------- - public void clearAttributes() { - if ( attributes == NO_ATTRIBUTES ) - attributes = new HashMap(); - else - this.attributes.clear(); - } - - /** - * @return the attribute map - */ - public Map getAttributes() { - return Collections.unmodifiableMap(attributes); - } - - // todo -- define common attributes as enum - - public void setAttributes(Map map) { - clearAttributes(); - putAttributes(map); - } - - public void putAttribute(String key, Object value) { - putAttribute(key, value, false); - } - - public void putAttribute(String key, Object value, boolean allowOverwrites) { - if ( hasAttribute(key) && ! allowOverwrites ) - throw new StingException("Attempting to overwrite key->value binding: key = " + key + " this = " + this); - - if ( attributes == NO_ATTRIBUTES ) // immutable -> mutable - attributes = new HashMap(attributes); - - this.attributes.put(key, value); - } - - public void removeAttribute(String key) { - if ( attributes == NO_ATTRIBUTES ) // immutable -> mutable - attributes = new HashMap(attributes); - this.attributes.remove(key); - } - - public void putAttributes(Map map) { - if ( map != null ) { - for ( Map.Entry elt : map.entrySet() ) { - putAttribute(elt.getKey(), elt.getValue()); - } - } - } - - public boolean hasAttribute(String key) { - return attributes.containsKey(key); - } - - public int getNumAttributes() { - return attributes.size(); - } - - /** - * @param key the attribute key - * - * @return the attribute value for the given key (or null if not set) - */ - public Object getAttribute(String key) { - return attributes.get(key); - } - - public Object getAttribute(String key, Object defaultValue) { - if ( hasAttribute(key) ) - return attributes.get(key); - else - return defaultValue; - } - -// public AttributedObject getAttributes(Collection keys) { -// AttributedObject selected = new AttributedObject(); -// -// for ( Object key : keys ) -// selected.putAttribute(key, this.getAttribute(key)); -// -// return selected; -// } - - public String getAttributeAsString(String key) { return (String.valueOf(getAttribute(key))); } - public int getAttributeAsInt(String key) { return (Integer)getAttribute(key); } - public double getAttributeAsDouble(String key) { return (Double)getAttribute(key); } - - public String getAttributeAsString(String key, String defaultValue) { return (String)getAttribute(key, defaultValue); } - public int getAttributeAsInt(String key, int defaultValue) { return (Integer)getAttribute(key, defaultValue); } - public double getAttributeAsDouble(String key, double defaultValue) { return (Double)getAttribute(key, defaultValue); } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/MutableGenotype.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/MutableGenotype.java deleted file mode 100755 index 206c6d948..000000000 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/MutableGenotype.java +++ /dev/null @@ -1,75 +0,0 @@ -package org.broadinstitute.sting.gatk.contexts.variantcontext; - -import java.util.*; - -/** - * This class emcompasses all the basic information about a genotype. It is immutable. - * - * @author Mark DePristo - */ -public class MutableGenotype extends Genotype { - public MutableGenotype(Genotype parent) { - super(parent.getSampleName(), parent.getAlleles(), parent.getNegLog10PError(), parent.getFilters(), parent.getAttributes(), parent.genotypesArePhased()); - } - - public MutableGenotype(String sampleName, Genotype parent) { - super(sampleName, parent.getAlleles(), parent.getNegLog10PError(), parent.getFilters(), parent.getAttributes(), parent.genotypesArePhased()); - } - - - public MutableGenotype(String sampleName, List alleles, double negLog10PError, Set filters, Map attributes, boolean genotypesArePhased) { - super(sampleName, alleles, negLog10PError, filters, attributes, genotypesArePhased); - } - - public MutableGenotype(String sampleName, List alleles, double negLog10PError) { - super(sampleName, alleles, negLog10PError); - } - - public MutableGenotype(String sampleName, List alleles) { - super(sampleName, alleles); - } - - public Genotype unmodifiableGenotype() { - return new Genotype(getSampleName(), getAlleles(), getNegLog10PError(), getFilters(), getAttributes(), genotypesArePhased()); - } - - - /** - * - * @param alleles list of alleles - */ - public void setAlleles(List alleles) { - this.alleles = new ArrayList(alleles); - - // todo -- add validation checking here - - if ( alleles == null ) throw new IllegalArgumentException("BUG: alleles cannot be null in setAlleles"); - if ( alleles.size() == 0) throw new IllegalArgumentException("BUG: alleles cannot be of size 0 in setAlleles"); - - int nNoCalls = 0; - for ( Allele allele : alleles ) { nNoCalls += allele.isNoCall() ? 1 : 0; } - if ( nNoCalls > 0 && nNoCalls != alleles.size() ) - throw new IllegalArgumentException("BUG: alleles include some No Calls and some Calls, an illegal state " + this); - - for ( Allele allele : alleles ) - if ( allele == null ) throw new IllegalArgumentException("BUG: Cannot add a null allele to a genotype"); - } - - // --------------------------------------------------------------------------------------------------------- - // - // InferredGeneticContext mutation operators - // - // --------------------------------------------------------------------------------------------------------- - public void setName(String name) { commonInfo.setName(name); } - public void addFilter(String filter) { commonInfo.addFilter(filter); } - public void addFilters(Collection filters) { commonInfo.addFilters(filters); } - public void clearFilters() { commonInfo.clearFilters(); } - public void setFilters(Collection filters) { commonInfo.setFilters(filters); } - public void setAttributes(Map map) { commonInfo.setAttributes(map); } - public void putAttribute(String key, Object value) { commonInfo.putAttribute(key, value); } - public void removeAttribute(String key) { commonInfo.removeAttribute(key); } - public void putAttributes(Map map) { commonInfo.putAttributes(map); } - public void setNegLog10PError(double negLog10PError) { commonInfo.setNegLog10PError(negLog10PError); } - public void putAttribute(String key, Object value, boolean allowOverwrites) { commonInfo.putAttribute(key, value, allowOverwrites); } - -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/MutableVariantContext.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/MutableVariantContext.java deleted file mode 100755 index de62c9916..000000000 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/MutableVariantContext.java +++ /dev/null @@ -1,209 +0,0 @@ -package org.broadinstitute.sting.gatk.contexts.variantcontext; - -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.StingException; - -import java.util.*; - -/** - * Mutable version of VariantContext - * - * @author depristo - */ -public class MutableVariantContext extends VariantContext { - // --------------------------------------------------------------------------------------------------------- - // - // constructors - // - // --------------------------------------------------------------------------------------------------------- - - public MutableVariantContext(String name, GenomeLoc loc, Collection alleles, Collection genotypes, double negLog10PError, Set filters, Map attributes) { - super(name, loc, alleles, genotypes, negLog10PError, filters, attributes); - } - - public MutableVariantContext(String name, GenomeLoc loc, Collection alleles, Map genotypes, double negLog10PError, Set filters, Map attributes) { - super(name, loc, alleles, genotypes, negLog10PError, filters, attributes); - } - - public MutableVariantContext(String name, GenomeLoc loc, Collection alleles) { - this(name, loc, alleles, NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null); - } - - public MutableVariantContext(String name, GenomeLoc loc, Collection alleles, Collection genotypes) { - this(name, loc, alleles, genotypes, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null); - } - - public MutableVariantContext(VariantContext parent) { - this(parent.getName(), parent.getLocation(), parent.getAlleles(), parent.getGenotypes(), parent.getNegLog10PError(), parent.getFilters(), parent.getAttributes()); - } - - /** - * Sets the alleles segregating in this context to the collect of alleles. Each of which must be unique according - * to equals() in Allele. Validate() should be called when you are done modifying the context. - * - * @param alleles - */ - public void setAlleles(Collection alleles) { - this.alleles.clear(); - for ( Allele a : alleles ) - addAllele(a); - } - - /** - * Adds allele to the segregating allele list in this context to the collection of alleles. The new - * allele must be be unique according to equals() in Allele. - * Validate() should be called when you are done modifying the context. - * - * @param allele - */ - public void addAllele(Allele allele) { - final boolean allowDuplicates = false; // used to be a parameter - - type = null; - - for ( Allele a : alleles ) { - if ( a.basesMatch(allele) && ! allowDuplicates ) - throw new IllegalArgumentException("Duplicate allele added to VariantContext" + this); - } - - // we are a novel allele - alleles.add(allele); - } - - public void clearGenotypes() { - this.genotypes.clear(); - } - - /** - * Adds this single genotype to the context, not allowing duplicate genotypes to be added - * @param genotype - */ - public void addGenotypes(Genotype genotype) { - putGenotype(genotype.getSampleName(), genotype, false); - } - - /** - * Adds these genotypes to the context, not allowing duplicate genotypes to be added - * @param genotypes - */ - public void addGenotypes(Collection genotypes) { - for ( Genotype g : genotypes ) { - addGenotype(g); - } - } - - /** - * Adds these genotype to the context, not allowing duplicate genotypes to be added. - * @param genotypes - */ - public void addGenotypes(Map genotypes) { - - for ( Map.Entry elt : genotypes.entrySet() ) { - addGenotype(elt.getValue()); - } - } - - /** - * Adds these genotypes to the context. - * - * @param genotypes - */ - public void putGenotypes(Map genotypes) { - for ( Map.Entry g : genotypes.entrySet() ) - putGenotype(g.getKey(), g.getValue()); - } - - /** - * Adds these genotypes to the context. - * - * @param genotypes - */ - public void putGenotypes(Collection genotypes) { - for ( Genotype g : genotypes ) - putGenotype(g); - } - - /** - * Adds this genotype to the context, throwing an error if it's already bound. - * - * @param genotype - */ - public void addGenotype(Genotype genotype) { - addGenotype(genotype.getSampleName(), genotype); - } - - /** - * Adds this genotype to the context, throwing an error if it's already bound. - * - * @param genotype - */ - public void addGenotype(String sampleName, Genotype genotype) { - putGenotype(sampleName, genotype, false); - } - - /** - * Adds this genotype to the context. - * - * @param genotype - */ - public void putGenotype(Genotype genotype) { - putGenotype(genotype.getSampleName(), genotype); - } - - /** - * Adds this genotype to the context. - * - * @param genotype - */ - public void putGenotype(String sampleName, Genotype genotype) { - putGenotype(sampleName, genotype, true); - } - - private void putGenotype(String sampleName, Genotype genotype, boolean allowOverwrites) { - if ( hasGenotype(sampleName) && ! allowOverwrites ) - throw new StingException("Attempting to overwrite sample->genotype binding: " + sampleName + " this=" + this); - - if ( ! sampleName.equals(genotype.getSampleName()) ) - throw new StingException("Sample name doesn't equal genotype.getSample(): " + sampleName + " genotype=" + genotype); - - this.genotypes.put(sampleName, genotype); - } - - /** - * Removes the binding from sampleName to genotype. If this doesn't exist, throws an IllegalArgumentException - * @param sampleName - */ - public void removeGenotype(String sampleName) { - if ( ! this.genotypes.containsKey(sampleName) ) - throw new IllegalArgumentException("Sample name isn't contained in genotypes " + sampleName + " genotypes =" + genotypes); - - this.genotypes.remove(sampleName); - } - - /** - * Removes genotype from the context. If this doesn't exist, throws an IllegalArgumentException - * @param genotype - */ - public void removeGenotype(Genotype genotype) { - removeGenotype(genotype.getSampleName()); - } - - // todo -- add replace genotype routine - - // --------------------------------------------------------------------------------------------------------- - // - // InferredGeneticContext mutation operators - // - // --------------------------------------------------------------------------------------------------------- - public void setName(String name) { commonInfo.setName(name); } - public void addFilter(String filter) { commonInfo.addFilter(filter); } - public void addFilters(Collection filters) { commonInfo.addFilters(filters); } - public void clearFilters() { commonInfo.clearFilters(); } - public void setFilters(Collection filters) { commonInfo.setFilters(filters); } - public void setAttributes(Map map) { commonInfo.setAttributes(map); } - public void putAttribute(String key, Object value) { commonInfo.putAttribute(key, value); } - public void removeAttribute(String key) { commonInfo.removeAttribute(key); } - public void putAttributes(Map map) { commonInfo.putAttributes(map); } - public void setNegLog10PError(double negLog10PError) { commonInfo.setNegLog10PError(negLog10PError); } - public void putAttribute(String key, Object value, boolean allowOverwrites) { commonInfo.putAttribute(key, value, allowOverwrites); } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContext.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContext.java deleted file mode 100755 index 4de954d6c..000000000 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContext.java +++ /dev/null @@ -1,1040 +0,0 @@ -package org.broadinstitute.sting.gatk.contexts.variantcontext; - -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.Utils; -import org.broad.tribble.Feature; - -import java.util.*; - -/** - * Class VariantContext - * - * == High-level overview == - * - * The VariantContext object is a single general class system for representing genetic variation data composed of: - * - * * Allele: representing single genetic haplotypes (A, T, ATC, -) - * * Genotype: an assignment of alleles for each chromosome of a single named sample at a particular locus - * * VariantContext: an abstract class holding all segregating alleles at a locus as well as genotypes - * for multiple individuals containing alleles at that locus - * - * The class system works by defining segregating alleles, creating a variant context representing the segregating - * information at a locus, and potentially creating and associating genotypes with individuals in the context. - * - * All of the classes are highly validating -- call validate() if you modify them -- so you can rely on the - * self-consistency of the data once you have a VariantContext in hand. The system has a rich set of assessor - * and manipulator routines, as well as more complex static support routines in VariantContextUtils. - * - * The VariantContext (and Genotype) objects are attributed (supporting addition of arbitrary key/value pairs) and - * filtered (can represent a variation that is viewed as suspect). - * - * VariantContexts are dynamically typed, so whether a VariantContext is a SNP, Indel, or NoVariant depends - * on the properties of the alleles in the context. See the detailed documentation on the Type parameter below. - * - * It's also easy to create subcontexts based on selected genotypes. - * - * == Working with Variant Contexts == - * By default, VariantContexts are immutable. In order to access (in the rare circumstances where you need them) - * setter routines, you need to create MutableVariantContexts and MutableGenotypes. - * - * === Some example data === - * - * Allele A, Aref, T, Tref; - * Allele del, delRef, ATC, ATCref; - * - * A [ref] / T at 10 - * GenomeLoc snpLoc = GenomeLocParser.createGenomeLoc("chr1", 10, 10); - * - * - / ATC [ref] from 20-23 - * GenomeLoc delLoc = GenomeLocParser.createGenomeLoc("chr1", 20, 22); - * - * // - [ref] / ATC immediately after 20 - * GenomeLoc insLoc = GenomeLocParser.createGenomeLoc("chr1", 20, 20); - * - * === Alleles === - * - * See the documentation in the Allele class itself - * - * What are they? - * - * Alleles can be either reference or non-reference - * - * Example alleles used here: - * - * del = new Allele("-"); - * A = new Allele("A"); - * Aref = new Allele("A", true); - * T = new Allele("T"); - * ATC = new Allele("ATC"); - * - * === Creating variant contexts === - * - * ==== By hand ==== - * - * Here's an example of a A/T polymorphism with the A being reference: - * - *
- * VariantContext vc = new VariantContext(name, snpLoc, Arrays.asList(Aref, T));
- * 
- * - * If you want to create a non-variant site, just put in a single reference allele - * - *
- * VariantContext vc = new VariantContext(name, snpLoc, Arrays.asList(Aref));
- * 
- * - * A deletion is just as easy: - * - *
- * VariantContext vc = new VariantContext(name, delLoc, Arrays.asList(ATCref, del));
- * 
- * - * The only 2 things that distinguishes between a insertion and deletion are the reference allele - * and the location of the variation. An insertion has a Null reference allele and at least - * one non-reference Non-Null allele. Additionally, the location of the insertion is immediately after - * a 1-bp GenomeLoc (at say 20). - * - *
- * VariantContext vc = new VariantContext("name", insLoc, Arrays.asList(delRef, ATC));
- * 
- * - * ==== Converting rods and other data structures to VCs ==== - * - * You can convert many common types into VariantContexts using the general function: - * - *
- * VariantContextAdaptors.convertToVariantContext(name, myObject)
- * 
- * - * dbSNP and VCFs, for example, can be passed in as myObject and a VariantContext corresponding to that - * object will be returned. A null return type indicates that the type isn't yet supported. This is the best - * and easiest way to create contexts using RODs. - * - * - * === Working with genotypes === - * - *
- * List alleles = Arrays.asList(Aref, T);
- * Genotype g1 = new Genotype(Arrays.asList(Aref, Aref), "g1", 10);
- * Genotype g2 = new Genotype(Arrays.asList(Aref, T), "g2", 10);
- * Genotype g3 = new Genotype(Arrays.asList(T, T), "g3", 10);
- * VariantContext vc = new VariantContext(snpLoc, alleles, Arrays.asList(g1, g2, g3));
- * 
- * - * At this point we have 3 genotypes in our context, g1-g3. - * - * You can assess a good deal of information about the genotypes through the VariantContext: - * - *
- * vc.hasGenotypes()
- * vc.isMonomorphic()
- * vc.isPolymorphic()
- * vc.getSampleNames().size()
- *
- * vc.getGenotypes()
- * vc.getGenotypes().get("g1")
- * vc.hasGenotype("g1")
- *
- * vc.getChromosomeCount()
- * vc.getChromosomeCount(Aref)
- * vc.getChromosomeCount(T)
- * 
- * - * === NO_CALL alleles === - * - * The system allows one to create Genotypes carrying special NO_CALL alleles that aren't present in the - * set of context alleles and that represent undetermined alleles in a genotype: - * - * Genotype g4 = new Genotype(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), "NO_DATA_FOR_SAMPLE", 10); - * - * - * === subcontexts === - * It's also very easy get subcontext based only the data in a subset of the genotypes: - * - *
- * VariantContext vc12 = vc.subContextFromGenotypes(Arrays.asList(g1,g2));
- * VariantContext vc1 = vc.subContextFromGenotypes(Arrays.asList(g1));
- * 
- * - * @author depristo - */ -public class VariantContext implements Feature { // to enable tribble intergration - protected InferredGeneticContext commonInfo = null; - public final static double NO_NEG_LOG_10PERROR = InferredGeneticContext.NO_NEG_LOG_10PERROR; - public final static String REFERENCE_BASE_FOR_INDEL_KEY = "REFERENCE_BASE_FOR_INDEL"; - public final static String ID_KEY = "ID"; - - /** The location of this VariantContext */ - private GenomeLoc loc; - - /** The type (cached for performance reasons) of this context */ - protected Type type = null; - - /** A set of the alleles segregating in this context */ - protected Set alleles = null; - - /** A mapping from sampleName -> genotype objects for all genotypes associated with this context */ - protected Map genotypes = null; - - /** Counts for each of the possible Genotype types in this context */ - protected int[] genotypeCounts = null; - - protected final static Map NO_GENOTYPES = Collections.unmodifiableMap(new HashMap()); - - // a fast cached access point to the ref / alt alleles for biallelic case - private Allele REF = null; - - // set to the alt allele when biallelic, otherwise == null - private Allele ALT = null; - - // were filters applied? - private boolean filtersWereAppliedToContext; - - // --------------------------------------------------------------------------------------------------------- - // - // constructors - // - // --------------------------------------------------------------------------------------------------------- - - - /** - * the complete constructor. Makes a complete VariantContext from its arguments - * - * @param name name - * @param loc location - * @param alleles alleles - * @param genotypes genotypes map - * @param negLog10PError qual - * @param filters filters: use null for unfiltered and empty set for passes filters - * @param attributes attributes - */ - public VariantContext(String name, GenomeLoc loc, Collection alleles, Map genotypes, double negLog10PError, Set filters, Map attributes) { - if ( loc == null ) { throw new StingException("GenomeLoc cannot be null"); } - this.loc = loc; - this.commonInfo = new InferredGeneticContext(name, negLog10PError, filters, attributes); - filtersWereAppliedToContext = filters != null; - - if ( alleles == null ) { throw new StingException("Alleles cannot be null"); } - // we need to make this a LinkedHashSet in case the user prefers a given ordering of alleles - this.alleles = Collections.unmodifiableSet(alleleCollectionToSet(new LinkedHashSet(), alleles)); - - if ( genotypes == null ) { genotypes = NO_GENOTYPES; } - this.genotypes = Collections.unmodifiableMap(genotypes); - - // cache the REF and ALT alleles - int nAlleles = alleles.size(); - for ( Allele a : alleles ) { - if ( a.isReference() ) { - REF = a; - } else if ( nAlleles == 2 ) { // only cache ALT when biallelic - ALT = a; - } - } - - validate(); - } - - /** - * Create a new VariantContext - * - * @param name name - * @param loc location - * @param alleles alleles - * @param genotypes genotypes set - * @param negLog10PError qual - * @param filters filters: use null for unfiltered and empty set for passes filters - * @param attributes attributes - */ - public VariantContext(String name, GenomeLoc loc, Collection alleles, Collection genotypes, double negLog10PError, Set filters, Map attributes) { - this(name, loc, alleles, genotypes != null ? genotypeCollectionToMap(new TreeMap(), genotypes) : null, negLog10PError, filters, attributes); - } - - /** - * Create a new variant context without genotypes and no Perror, no filters, and no attributes - * @param name name - * @param loc location - * @param alleles alleles - */ - public VariantContext(String name, GenomeLoc loc, Collection alleles) { - this(name, loc, alleles, NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null); - } - - /** - * Create a new variant context without genotypes and no Perror, no filters, and no attributes - * @param name name - * @param loc location - * @param alleles alleles - * @param genotypes genotypes - */ - public VariantContext(String name, GenomeLoc loc, Collection alleles, Collection genotypes) { - this(name, loc, alleles, genotypes, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null); - } - - /** - * Copy constructor - * - * @param other the VariantContext to copy - */ - public VariantContext(VariantContext other) { - this(other.getName(), other.getLocation(), other.getAlleles(), other.getGenotypes(), other.getNegLog10PError(), other.getFilters(), other.getAttributes()); - } - - - // --------------------------------------------------------------------------------------------------------- - // - // Selectors - // - // --------------------------------------------------------------------------------------------------------- - - /** - * Returns a context identical to this (i.e., filter, qual are all the same) but containing only the Genotype - * genotype and alleles in genotype. This is the right way to test if a single genotype is actually - * variant or not. - * - * @param genotype genotype - * @return vc subcontext - */ - public VariantContext subContextFromGenotypes(Genotype genotype) { - return subContextFromGenotypes(Arrays.asList(genotype)); - } - - - /** - * Returns a context identical to this (i.e., filter, qual are all the same) but containing only the Genotypes - * genotypes and alleles in these genotypes. This is the right way to test if a single genotype is actually - * variant or not. - * - * @param genotypes genotypes - * @return vc subcontext - */ - public VariantContext subContextFromGenotypes(Collection genotypes) { - return new VariantContext(getName(), getLocation(), allelesOfGenotypes(genotypes), genotypes, getNegLog10PError(), getFilters(), getAttributes()); - } - - /** - * helper routine for subcontext - * @param genotypes genotypes - * @return allele set - */ - private Set allelesOfGenotypes(Collection genotypes) { - Set alleles = new HashSet(); - - boolean addedref = false; - for ( Genotype g : genotypes ) { - for ( Allele a : g.getAlleles() ) { - addedref = addedref || a.isReference(); - if ( a.isCalled() ) - alleles.add(a); - } - } - if ( ! addedref ) alleles.add(getReference()); - - return alleles; - } - - // --------------------------------------------------------------------------------------------------------- - // - // type operations - // - // --------------------------------------------------------------------------------------------------------- - - /** - * see: http://www.ncbi.nlm.nih.gov/bookshelf/br.fcgi?book=handbook&part=ch5&rendertype=table&id=ch5.ch5_t3 - * - * Format: - * dbSNP variation class - * Rules for assigning allele classes - * Sample allele definition - * - * Single Nucleotide Polymorphisms (SNPs)a - * Strictly defined as single base substitutions involving A, T, C, or G. - * A/T - * - * Deletion/Insertion Polymorphisms (DIPs) - * Designated using the full sequence of the insertion as one allele, and either a fully - * defined string for the variant allele or a '-' character to specify the deleted allele. - * This class will be assigned to a variation if the variation alleles are of different lengths or - * if one of the alleles is deleted ('-'). - * T/-/CCTA/G - * - * No-variation - * Reports may be submitted for segments of sequence that are assayed and determined to be invariant - * in the sample. - * (NoVariation) - * - * Mixed - * Mix of other classes - * - * Also supports NO_VARIATION type, used to indicate that the site isn't polymorphic in the population - * - * - * Not currently supported: - * - * Heterozygous sequencea - * The term heterozygous is used to specify a region detected by certain methods that do not - * resolve the polymorphism into a specific sequence motif. In these cases, a unique flanking - * sequence must be provided to define a sequence context for the variation. - * (heterozygous) - * - * Microsatellite or short tandem repeat (STR) - * Alleles are designated by providing the repeat motif and the copy number for each allele. - * Expansion of the allele repeat motif designated in dbSNP into full-length sequence will - * be only an approximation of the true genomic sequence because many microsatellite markers are - * not fully sequenced and are resolved as size variants only. - * (CAC)8/9/10/11 - * - * Named variant - * Applies to insertion/deletion polymorphisms of longer sequence features, such as retroposon - * dimorphism for Alu or line elements. These variations frequently include a deletion '-' indicator - * for the absent allele. - * (alu) / - - * - * Multi-Nucleotide Polymorphism (MNP) - * Assigned to variations that are multi-base variations of a single, common length - * GGA/AGT - */ - public enum Type { - NO_VARIATION, - SNP, - MNP, // a multi-nucleotide polymorphism - INDEL, - MIXED, - } - - /** - * Determines (if necessary) and returns the type of this variation by examining the alleles it contains. - * - * @return the type of this VariantContext - **/ - public Type getType() { - if ( type == null ) - determineType(); - - return type; - } - - /** - * convenience method for SNPs - * - * @return true if this is a SNP, false otherwise - */ - public boolean isSNP() { return getType() == Type.SNP; } - - public BaseUtils.BaseSubstitutionType getSNPSubstitutionType() { - if ( ! isSNP() || ! isBiallelic() ) throw new IllegalStateException("Requested SNP substitution type for bialleic non-SNP " + this); - return BaseUtils.SNPSubstitutionType(getReference().getBases()[0], getAlternateAllele(0).getBases()[0]); - } - - /** If this is a BiAlleic SNP, is it a transition? */ - public boolean isTransition() { return getSNPSubstitutionType() == BaseUtils.BaseSubstitutionType.TRANSITION; } - - /** If this is a BiAlleic SNP, is it a transversion? */ - public boolean isTransversion() { return getSNPSubstitutionType() == BaseUtils.BaseSubstitutionType.TRANSVERSION; } - - /** - * convenience method for variants - * - * @return true if this is a variant allele, false if it's reference - */ - public boolean isVariant() { return getType() != Type.NO_VARIATION; } - - /** - * convenience method for indels - * - * @return true if this is an indel, false otherwise - */ - public boolean isIndel() { return getType() == Type.INDEL; } - - /** - * @return true if the alleles indicate a simple insertion (i.e., the reference allele is Null) - */ - public boolean isInsertion() { - return getType() == Type.INDEL && getReference().isNull(); - } - - /** - * @return true if the alleles indicate a simple deletion (i.e., a single alt allele that is Null) - */ - public boolean isDeletion() { - return getType() == Type.INDEL && ! isInsertion(); - } - - /** - * convenience method for indels - * - * @return true if this is an mixed variation, false otherwise - */ - public boolean isMixed() { return getType() == Type.MIXED; } - - - // --------------------------------------------------------------------------------------------------------- - // - // Generic accessors - // - // --------------------------------------------------------------------------------------------------------- - - /** - * @return the location of this context - */ - public GenomeLoc getLocation() { return loc; } - - - // --------------------------------------------------------------------------------------------------------- - // - // get routines to access context info fields - // - // --------------------------------------------------------------------------------------------------------- - public String getName() { return commonInfo.getName(); } - public Set getFilters() { return commonInfo.getFilters(); } - public boolean isFiltered() { return commonInfo.isFiltered(); } - public boolean isNotFiltered() { return commonInfo.isNotFiltered(); } - public boolean filtersWereApplied() { return filtersWereAppliedToContext; } - public boolean hasNegLog10PError() { return commonInfo.hasNegLog10PError(); } - public double getNegLog10PError() { return commonInfo.getNegLog10PError(); } - public double getPhredScaledQual() { return commonInfo.getPhredScaledQual(); } - - public Map getAttributes() { return commonInfo.getAttributes(); } - public boolean hasAttribute(String key) { return commonInfo.hasAttribute(key); } - public Object getAttribute(String key) { return commonInfo.getAttribute(key); } - - public Object getAttribute(String key, Object defaultValue) { - return commonInfo.getAttribute(key, defaultValue); - } - - public String getAttributeAsString(String key) { return commonInfo.getAttributeAsString(key); } - public String getAttributeAsString(String key, String defaultValue) { return commonInfo.getAttributeAsString(key, defaultValue); } - public int getAttributeAsInt(String key) { return commonInfo.getAttributeAsInt(key); } - public int getAttributeAsInt(String key, int defaultValue) { return commonInfo.getAttributeAsInt(key, defaultValue); } - public double getAttributeAsDouble(String key) { return commonInfo.getAttributeAsDouble(key); } - public double getAttributeAsDouble(String key, double defaultValue) { return commonInfo.getAttributeAsDouble(key, defaultValue); } - - - // --------------------------------------------------------------------------------------------------------- - // - // Working with alleles - // - // --------------------------------------------------------------------------------------------------------- - - /** - * @return the reference allele for this context - */ - public Allele getReference() { - Allele ref = REF; - if ( ref == null ) - throw new StingException("BUG: no reference allele found at " + this); - return ref; - } - - /** Private helper routine that grabs the reference allele but doesn't throw an error if there's no such allele */ - -// private Allele getReferenceWithoutError() { -// for ( Allele allele : getAlleles() ) { -// if ( allele.isReference() ) { -// return allele; -// } -// } -// -// return null; -// } - - /** - * @return true if the context is strictly bi-allelic - */ - public boolean isBiallelic() { - return getNAlleles() == 2; - } - - /** - * @return The number of segregating alleles in this context - */ - public int getNAlleles() { - return alleles.size(); - } - - /** - * @return The allele sharing the same bases as this String. A convenience method; better to use byte[] - */ - public Allele getAllele(String allele) { - return getAllele(allele.getBytes()); - } - - /** - * @return The allele sharing the same bases as this byte[], or null if no such allele is present. - */ - public Allele getAllele(byte[] allele) { - return Allele.getMatchingAllele(getAlleles(), allele); - } - - /** - * @return True if this context contains Allele allele, or false otherwise - */ - public boolean hasAllele(Allele allele) { - return hasAllele(allele, false); - } - - public boolean hasAllele(Allele allele, boolean ignoreRefState) { - if ( allele == REF || allele == ALT ) // optimization for cached cases - return true; - - for ( Allele a : getAlleles() ) { - if ( a.equals(allele, ignoreRefState) ) - return true; - } - - return false; - } - - - /** - * Gets the alleles. This method should return all of the alleles present at the location, - * including the reference allele. There are no constraints imposed on the ordering of alleles - * in the set. If the reference is not an allele in this context it will not be included. - * - * @return the set of alleles - */ - public Set getAlleles() { return alleles; } - - /** - * Gets the alternate alleles. This method should return all the alleles present at the location, - * NOT including the reference allele. There are no constraints imposed on the ordering of alleles - * in the set. - * - * @return the set of alternate alleles - */ - public Set getAlternateAlleles() { - HashSet altAlleles = new HashSet(); - for ( Allele allele : alleles ) { - if ( allele.isNonReference() ) - altAlleles.add(allele); - } - - return Collections.unmodifiableSet(altAlleles); - } - - /** - * Gets the sizes of the alternate alleles if they are insertion/deletion events, and returns a list of their sizes - * - * @return a list of indel lengths ( null if not of type indel or mixed ) - */ - public List getIndelLengths() { - if ( getType() != Type.INDEL && getType() != Type.MIXED ) { - return null; - } - - List lengths = new ArrayList(); - for ( Allele a : getAlternateAlleles() ) { - lengths.add(a.length() - getReference().length()); - } - - return lengths; - } - - /** - * @param i -- the ith allele (from 0 to n - 2 for a context with n alleles including a reference allele) - * @return the ith non-reference allele in this context - * @throws IllegalArgumentException if i is invalid - */ - public Allele getAlternateAllele(int i) { - int n = 0; - - for ( Allele allele : alleles ) { - if ( allele.isNonReference() && n++ == i ) - return allele; - } - - throw new IllegalArgumentException("Requested " + i + " alternative allele but there are only " + n + " alternative alleles " + this); - } - - // --------------------------------------------------------------------------------------------------------- - // - // Working with genotypes - // - // --------------------------------------------------------------------------------------------------------- - - /** - * @return the number of samples in the context - */ - public int getNSamples() { return genotypes.size(); } - - /** - * @return true if the context has associated genotypes - */ - public boolean hasGenotypes() { return genotypes.size() > 0; } - - public boolean hasGenotypes(Collection sampleNames) { - for ( String name : sampleNames ) { - if ( ! genotypes.containsKey(name) ) - return false; - } - return true; - } - - /** - * @return set of all Genotypes associated with this context - */ - public Map getGenotypes() { return genotypes; } - - public List getGenotypesSortedByName() { return Utils.sorted(genotypes); } - - /** - * Returns a map from sampleName -> Genotype for the genotype associated with sampleName. Returns a map - * for consistency with the multi-get function. - * - * @param sampleName - * @return - * @throws IllegalArgumentException if sampleName isn't bound to a genotype - */ - public Map getGenotypes(String sampleName) { - return getGenotypes(Arrays.asList(sampleName)); - } - - /** - * Returns a map from sampleName -> Genotype for each sampleName in sampleNames. Returns a map - * for consistency with the multi-get function. - * - * @param sampleNames a unique list of sample names - * @return - * @throws IllegalArgumentException if sampleName isn't bound to a genotype - */ - public Map getGenotypes(Collection sampleNames) { - HashMap map = new HashMap(); - - for ( String name : sampleNames ) { - if ( map.containsKey(name) ) throw new IllegalArgumentException("Duplicate names detected in requested samples " + sampleNames); - map.put(name, getGenotype(name)); - } - - return map; - } - - /** - * @return the set of all sample names in this context - */ - public Set getSampleNames() { - return getGenotypes().keySet(); - } - - /** - * @param sample the sample name - * - * @return the Genotype associated with the given sample in this context or null if the sample is not in this context - */ - public Genotype getGenotype(String sample) { - return getGenotypes().get(sample); - } - - public boolean hasGenotype(String sample) { - return getGenotypes().containsKey(sample); - } - - public Genotype getGenotype(int ith) { - return getGenotypesSortedByName().get(ith); - } - - - /** - * Returns the number of chromosomes carrying any allele in the genotypes (i.e., excluding NO_CALLS - * - * @return chromosome count - */ - public int getChromosomeCount() { - int n = 0; - - for ( Genotype g : getGenotypes().values() ) { - n += g.isNoCall() ? 0 : g.getPloidy(); - } - - return n; - } - - /** - * Returns the number of chromosomes carrying allele A in the genotypes - * - * @param a allele - * @return chromosome count - */ - public int getChromosomeCount(Allele a) { - int n = 0; - - for ( Genotype g : getGenotypes().values() ) { - n += g.getAlleles(a).size(); - } - - return n; - } - - /** - * Genotype-specific functions -- are the genotypes monomorphic w.r.t. to the alleles segregating at this - * site? That is, is the number of alternate alleles among all fo the genotype == 0? - * - * @return true if it's monomorphic - */ - public boolean isMonomorphic() { - return ! isVariant() || getChromosomeCount(getReference()) == getChromosomeCount(); - } - - /** - * Genotype-specific functions -- are the genotypes polymorphic w.r.t. to the alleles segregating at this - * site? That is, is the number of alternate alleles among all fo the genotype > 0? - * - * @return true if it's polymorphic - */ - public boolean isPolymorphic() { - return ! isMonomorphic(); - } - - private void calculateGenotypeCounts() { - if ( genotypeCounts == null ) { - genotypeCounts = new int[Genotype.Type.values().length]; - - for ( Genotype g : getGenotypes().values() ) { - if ( g.isNoCall() ) - genotypeCounts[Genotype.Type.NO_CALL.ordinal()]++; - else if ( g.isHomRef() ) - genotypeCounts[Genotype.Type.HOM_REF.ordinal()]++; - else if ( g.isHet() ) - genotypeCounts[Genotype.Type.HET.ordinal()]++; - else if ( g.isHomVar() ) - genotypeCounts[Genotype.Type.HOM_VAR.ordinal()]++; - else - throw new StingException("Genotype of unknown type: " + g); - } - } - } - - /** - * Genotype-specific functions -- how many no-calls are there in the genotypes? - * - * @return number of no calls - */ - public int getNoCallCount() { - calculateGenotypeCounts(); - return genotypeCounts[Genotype.Type.NO_CALL.ordinal()]; - } - - /** - * Genotype-specific functions -- how many hom ref calls are there in the genotypes? - * - * @return number of hom ref calls - */ - public int getHomRefCount() { - calculateGenotypeCounts(); - return genotypeCounts[Genotype.Type.HOM_REF.ordinal()]; - } - - /** - * Genotype-specific functions -- how many het calls are there in the genotypes? - * - * @return number of het calls - */ - public int getHetCount() { - calculateGenotypeCounts(); - return genotypeCounts[Genotype.Type.HET.ordinal()]; - } - - /** - * Genotype-specific functions -- how many hom var calls are there in the genotypes? - * - * @return number of hom var calls - */ - public int getHomVarCount() { - return genotypeCounts[Genotype.Type.HOM_VAR.ordinal()]; - } - - // --------------------------------------------------------------------------------------------------------- - // - // validation - // - // --------------------------------------------------------------------------------------------------------- - - /** - * To be called by any modifying routines - */ - private boolean validate() { - return validate(true); - } - - private boolean validate(boolean throwException) { - try { - validateAlleles(); - validateGenotypes(); - } catch ( IllegalArgumentException e ) { - if ( throwException ) - throw e; - else - return false; - } - - return true; - } - - private void validateAlleles() { - // check alleles - boolean alreadySeenRef = false, alreadySeenNull = false; - for ( Allele allele : alleles ) { - // make sure there's only one reference allele - if ( allele.isReference() ) { - if ( alreadySeenRef ) throw new IllegalArgumentException("BUG: Received two reference tagged alleles in VariantContext " + alleles + " this=" + this); - alreadySeenRef = true; - } - - if ( allele.isNoCall() ) { - throw new IllegalArgumentException("BUG: Cannot add a no call allele to a variant context " + alleles + " this=" + this); - } - - // make sure there's only one null allele - if ( allele.isNull() ) { - if ( alreadySeenNull ) throw new IllegalArgumentException("BUG: Received two null alleles in VariantContext " + alleles + " this=" + this); - alreadySeenNull = true; - } - } - - // make sure there's one reference allele - if ( ! alreadySeenRef ) - throw new IllegalArgumentException("No reference allele found in VariantContext"); - -// if ( getType() == Type.INDEL ) { -// if ( getReference().length() != (getLocation().size()-1) ) { - if ( (getReference().isNull() && getLocation().size() != 1 ) || - (getReference().isNonNull() && (getLocation().size() - getReference().length() > 1))) { - throw new IllegalStateException("BUG: GenomeLoc " + getLocation() + " has a size == " + getLocation().size() + " but the variation reference allele has length " + getReference().length() + " this = " + this); - } - } - - private void validateGenotypes() { - if ( this.genotypes == null ) throw new IllegalStateException("Genotypes is null"); - - for ( Map.Entry elt : this.genotypes.entrySet() ) { - String name = elt.getKey(); - Genotype g = elt.getValue(); - - if ( ! name.equals(g.getSampleName()) ) throw new IllegalStateException("Bound sample name " + name + " does not equal the name of the genotype " + g.getSampleName()); - - for ( Allele gAllele : g.getAlleles() ) { - if ( ! hasAllele(gAllele) && gAllele.isCalled() ) - throw new IllegalStateException("Allele in genotype " + gAllele + " not in the variant context " + alleles); - } - } - } - - - - // --------------------------------------------------------------------------------------------------------- - // - // utility routines - // - // --------------------------------------------------------------------------------------------------------- - - // the indel base that gets stripped off for indels - public boolean hasReferenceBaseForIndel() { - return hasAttribute(REFERENCE_BASE_FOR_INDEL_KEY); - } - - // the indel base that gets stripped off for indels - public byte getReferenceBaseForIndel() { - return hasReferenceBaseForIndel() ? (Byte)getAttribute(REFERENCE_BASE_FOR_INDEL_KEY) : (byte)'N'; - } - - private void determineType() { - if ( type == null ) { - switch ( getNAlleles() ) { - case 0: - throw new StingException("Unexpected requested type of VariantContext with no alleles!" + this); - case 1: - type = Type.NO_VARIATION; - // note that this doesn't require a reference allele. You can be monomorphic independent of having a - // reference allele - break; - default: - if ( isMNPAllele(alleles, 1) ) { - type = Type.SNP; - } else if ( isMNPAllele(alleles, -1) ) { - type = Type.MNP; - } else if ( isDIPAllele(alleles) ) { - type = Type.INDEL; - } else { - type = Type.MIXED; - } - } - } - } - - private static boolean isMNPAllele(Set alleles, int requiredLength ) { // requireLength == -1 if you don't care -// if ( alleles.size() < 2 ) -// return false; - - int l = requiredLength; - for ( Allele allele : alleles ) { - if ( l == -1 ) // remember the length of the first allele - l = allele.length(); - - if ( allele.length() != l ) - return false; - } - - return true; - } - - private static boolean isDIPAllele(Set alleles) { - if ( alleles.size() != 2 ) - return false; - - Iterator it = alleles.iterator(); - Allele a1 = it.next(); - Allele a2 = it.next(); - return a1.length() != a2.length(); - } - - public String toString() { - return String.format("[VC %s @ %s of type=%s alleles=%s attr=%s GT=%s", - getName(), getLocation(), this.getType(), - Utils.sorted(this.getAlleles()), Utils.sortedString(this.getAttributes()), this.getGenotypesSortedByName()); - } - - // protected basic manipulation routines - private static Set alleleCollectionToSet(Set dest, Collection alleles) { - for ( Allele a : alleles ) { - for ( Allele b : dest ) { - if ( a.basesMatch(b) ) - throw new IllegalArgumentException("Duplicate allele added to VariantContext: " + a); - } - - dest.add(a); - } - - return dest; - } - - private static Map genotypeCollectionToMap(Map dest, Collection genotypes) { - for ( Genotype g : genotypes ) { - if ( dest.containsKey(g.getSampleName() ) ) - throw new IllegalArgumentException("Duplicate genotype added to VariantContext: " + g); - dest.put(g.getSampleName(), g); - } - - return dest; - } - - // --------------------------------------------------------------------------------------------------------- - // - // tribble integration routines -- not for public consumption - // - // --------------------------------------------------------------------------------------------------------- - @Override - public String getChr() { - return getLocation().getContig(); - } - - @Override - public int getStart() { - return (int)getLocation().getStart(); - } - - @Override - public int getEnd() { - return (int)getLocation().getStop(); - } - -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java index 24976fbc0..a47f75c37 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java @@ -26,16 +26,59 @@ package org.broadinstitute.sting.gatk.contexts.variantcontext; import java.io.Serializable; import java.util.*; import org.apache.commons.jexl2.*; -import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.Utils; +import org.broad.tribble.util.variantcontext.*; +import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.genotype.HardyWeinbergCalculation; import org.broad.tribble.vcf.VCFConstants; public class VariantContextUtils { final public static JexlEngine engine = new JexlEngine(); + /** + * Create a new VariantContext + * + * @param name name + * @param loc location + * @param alleles alleles + * @param genotypes genotypes set + * @param negLog10PError qual + * @param filters filters: use null for unfiltered and empty set for passes filters + * @param attributes attributes + */ + public static VariantContext toVC(String name, GenomeLoc loc, Collection alleles, Collection genotypes, double negLog10PError, Set filters, Map attributes) { + return new VariantContext(name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes != null ? VariantContext.genotypeCollectionToMap(new TreeMap(), genotypes) : null, negLog10PError, filters, attributes); + } + + /** + * Create a new variant context without genotypes and no Perror, no filters, and no attributes + * @param name name + * @param loc location + * @param alleles alleles + */ + public static VariantContext toVC(String name, GenomeLoc loc, Collection alleles) { + return new VariantContext (name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, VariantContext.NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null); + } + + /** + * Create a new variant context without genotypes and no Perror, no filters, and no attributes + * @param name name + * @param loc location + * @param alleles alleles + * @param genotypes genotypes + */ + public static VariantContext toVC(String name, GenomeLoc loc, Collection alleles, Collection genotypes) { + return new VariantContext(name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null); + } + + /** + * Copy constructor + * + * @param other the VariantContext to copy + */ + public static VariantContext toVC(VariantContext other) { + return new VariantContext(other.getName(), other.getChr(), other.getStart(), other.getEnd(), other.getAlleles(), other.getGenotypes(), other.getNegLog10PError(), other.getFilters(), other.getAttributes()); + } + /** * A simple but common wrapper for matching VariantContext objects using JEXL expressions */ @@ -230,7 +273,7 @@ public class VariantContextUtils { // establish the baseline info from the first VC VariantContext first = VCs.get(0); String name = first.getName(); - GenomeLoc loc = first.getLocation(); + GenomeLoc loc = getLocation(first); Set alleles = new TreeSet(); Map genotypes = new TreeMap(); @@ -250,11 +293,11 @@ public class VariantContextUtils { // cycle through and add info from the other VCs, making sure the loc/reference matches for ( VariantContext vc : VCs ) { - if ( loc.getStart() != vc.getLocation().getStart() ) // || !first.getReference().equals(vc.getReference()) ) + if ( loc.getStart() != vc.getStart() ) // || !first.getReference().equals(vc.getReference()) ) throw new StingException("BUG: attempting to merge VariantContexts with different start sites: first="+ first.toString() + " second=" + vc.toString()); - if ( vc.getLocation().size() > loc.size() ) - loc = vc.getLocation(); // get the longest location + if ( getLocation(vc).size() > loc.size() ) + loc = getLocation(vc); // get the longest location nFiltered += vc.isFiltered() ? 1 : 0; nVariant += vc.isVariant() ? 1 : 0; @@ -331,7 +374,7 @@ public class VariantContextUtils { if ( rsID != null ) attributes.put(VariantContext.ID_KEY, rsID); - VariantContext merged = new VariantContext(name, loc, alleles, genotypes, negLog10PError, filters, attributes); + VariantContext merged = new VariantContext(name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, negLog10PError, filters, attributes); if ( printMessages && remapped ) System.out.printf("Remapped => %s%n", merged); return merged; } @@ -483,7 +526,7 @@ public class VariantContextUtils { g.getFilters(),g.getAttributes(),g.genotypesArePhased())); } - return new VariantContext(inputVC.getName(), inputVC.getLocation(), alleles, genotypes, inputVC.getNegLog10PError(), + return new VariantContext(inputVC.getName(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(), inputVC.getFilters(), attributes); } @@ -500,7 +543,7 @@ public class VariantContextUtils { boolean padVC; // We need to pad a VC with a common base if the reference allele length is less than the vc location span. - long locLength = inputVC.getLocation().size(); + long locLength = getLocation(inputVC).size(); if (refAllele.length() == locLength) padVC = false; else if (refAllele.length() == locLength-1) @@ -552,7 +595,7 @@ public class VariantContextUtils { g.getFilters(),g.getAttributes(),g.genotypesArePhased())); } - return new VariantContext(inputVC.getName(), inputVC.getLocation(), alleles, genotypes, inputVC.getNegLog10PError(), + return new VariantContext(inputVC.getName(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(), inputVC.getFilters(), attributes); @@ -616,19 +659,19 @@ public class VariantContextUtils { } public static VariantContext modifyGenotypes(VariantContext vc, Map genotypes) { - return new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes()); + return new VariantContext(vc.getName(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes()); } public static VariantContext modifyLocation(VariantContext vc, GenomeLoc loc) { - return new VariantContext(vc.getName(), loc, vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes()); + return new VariantContext(vc.getName(), loc.getContig(), loc.getStart(), loc.getStop(), vc.getAlleles(), vc.getGenotypes(), vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes()); } public static VariantContext modifyFilters(VariantContext vc, Set filters) { - return new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), filters, vc.getAttributes()); + return new VariantContext(vc.getName(), vc.getChr(), vc.getStart(), vc.getEnd() , vc.getAlleles(), vc.getGenotypes(), vc.getNegLog10PError(), filters, vc.getAttributes()); } public static VariantContext modifyAttributes(VariantContext vc, Map attributes) { - return new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, attributes); + return new VariantContext(vc.getName(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.getGenotypes(), vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, attributes); } public static Genotype modifyName(Genotype g, String name) { @@ -655,4 +698,34 @@ public class VariantContextUtils { return VariantContextUtils.modifyGenotypes(vc, newGenotypes); } + + public static BaseUtils.BaseSubstitutionType getSNPSubstitutionType(VariantContext context) { + if (!context.isSNP() || !context.isBiallelic()) + throw new IllegalStateException("Requested SNP substitution type for bialleic non-SNP " + context); + return BaseUtils.SNPSubstitutionType(context.getReference().getBases()[0], context.getAlternateAllele(0).getBases()[0]); + } + + /** + * If this is a BiAlleic SNP, is it a transition? + */ + public static boolean isTransition(VariantContext context) { + return getSNPSubstitutionType(context) == BaseUtils.BaseSubstitutionType.TRANSITION; + } + + /** + * If this is a BiAlleic SNP, is it a transversion? + */ + public static boolean isTransversion(VariantContext context) { + return getSNPSubstitutionType(context) == BaseUtils.BaseSubstitutionType.TRANSVERSION; + } + + /** + * create a genome location, given a variant context + * @param vc the variant context + * @return the genomeLoc + */ + public static final GenomeLoc getLocation(VariantContext vc) { + return GenomeLocParser.createGenomeLoc(vc.getChr(),(int)vc.getStart(),(int)vc.getEnd()); + } + } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContext.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContext.java index 6aa74887b..50728ec81 100644 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContext.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContext.java @@ -25,7 +25,8 @@ package org.broadinstitute.sting.gatk.contexts.variantcontext; import org.apache.commons.jexl2.JexlContext; import org.apache.commons.jexl2.MapContext; -//import org.apache.commons.jexl2.JexlHelper; +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.Utils; import org.broad.tribble.vcf.VCFConstants; @@ -58,8 +59,8 @@ class VariantJEXLContext implements JexlContext { private static Map x = new HashMap(); static { - x.put("CHROM", new AttributeGetter() { public Object get(VariantContext vc) { return vc.getLocation().getContig(); }}); - x.put("POS", new AttributeGetter() { public Object get(VariantContext vc) { return vc.getLocation().getStart(); }}); + x.put("CHROM", new AttributeGetter() { public Object get(VariantContext vc) { return vc.getChr(); }}); + x.put("POS", new AttributeGetter() { public Object get(VariantContext vc) { return vc.getStart(); }}); x.put("TYPE", new AttributeGetter() { public Object get(VariantContext vc) { return vc.getType().toString(); }}); x.put("QUAL", new AttributeGetter() { public Object get(VariantContext vc) { return 10 * vc.getNegLog10PError(); }}); x.put("ALLELES", new AttributeGetter() { public Object get(VariantContext vc) { return vc.getAlleles(); }}); @@ -164,8 +165,8 @@ class JEXLMap implements Map { if ( vc != null ) { // create a mapping of what we know about the variant context, its Chromosome, positions, etc. - infoMap.put("CHROM", vc.getLocation().getContig()); - infoMap.put("POS", String.valueOf(vc.getLocation().getStart())); + infoMap.put("CHROM", VariantContextUtils.getLocation(vc).getContig()); + infoMap.put("POS", String.valueOf(VariantContextUtils.getLocation(vc).getStart())); infoMap.put("TYPE", vc.getType().toString()); infoMap.put("QUAL", String.valueOf(vc.getPhredScaledQual())); diff --git a/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java b/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java index c635cd0fc..46d2f2479 100755 --- a/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java +++ b/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java @@ -28,8 +28,8 @@ package org.broadinstitute.sting.gatk.io.storage; import java.io.*; import java.util.Set; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.utils.genotype.*; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.StingException; diff --git a/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterStub.java b/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterStub.java index d966aa1f5..4d2ace45c 100755 --- a/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterStub.java +++ b/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterStub.java @@ -28,9 +28,9 @@ package org.broadinstitute.sting.gatk.io.stubs; import java.io.File; import java.io.PrintStream; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.io.OutputTracker; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.utils.genotype.GenotypeWriter; import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; import net.sf.samtools.SAMFileHeader; diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/HapMapROD.java b/java/src/org/broadinstitute/sting/gatk/refdata/HapMapROD.java deleted file mode 100755 index 08570a86e..000000000 --- a/java/src/org/broadinstitute/sting/gatk/refdata/HapMapROD.java +++ /dev/null @@ -1,36 +0,0 @@ -package org.broadinstitute.sting.gatk.refdata; - -import java.util.*; - -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; - -public class HapMapROD extends TabularROD -{ - public HapMapROD(final String name) { - super(name); - } - - public GenomeLoc getLocation() { - // For converting from Hg18 to b36 format: - // return GenomeLocParser.createGenomeLoc(this.get("chrom").replaceAll("chr", ""), Long.parseLong(this.get("pos"))); - return GenomeLocParser.createGenomeLoc(this.get("chrom"), Long.parseLong(this.get("pos"))); - } - - public String[] getSampleIDs() { - ArrayList header = getHeader(); - String[] sample_ids = new String[header.size()-11]; - for (int i = 11; i < header.size(); i++) - sample_ids[i-11] = header.get(i); - return sample_ids; - } - - public String[] getGenotypes() { - ArrayList header = getHeader(); - String[] genotypes = new String[header.size()-11]; - for (int i = 11; i < header.size(); i++) - genotypes[i-11] = get(header.get(i)); - return genotypes; - } - -} diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/PlinkRod.java b/java/src/org/broadinstitute/sting/gatk/refdata/PlinkRod.java index 2dd3e85e1..43bd8c4fd 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/PlinkRod.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/PlinkRod.java @@ -1,6 +1,6 @@ package org.broadinstitute.sting.gatk.refdata; -import org.broadinstitute.sting.gatk.contexts.variantcontext.*; +import org.broad.tribble.util.variantcontext.Allele; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.StingException; diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java b/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java index e079bcac7..4efd34cca 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RefMetaDataTracker.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.gatk.refdata; import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index 1bbe3b87c..6fbaf0e77 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -4,12 +4,14 @@ import edu.mit.broad.picard.genotype.DiploidGenotype; import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods; import org.broad.tribble.dbsnp.DbSNPFeature; import org.broad.tribble.gelitext.GeliTextFeature; +import org.broad.tribble.hapmap.HapMapFeature; +import org.broad.tribble.util.variantcontext.Allele; +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.MutableGenotype; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.*; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; -import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableGenotype; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.genotype.CalledGenotype; @@ -42,9 +44,8 @@ public class VariantContextAdaptors { static { adaptors.put(DbSNPFeature.class, new DBSnpAdaptor()); - adaptors.put(VCFRecord.class, new VCFRecordAdaptor()); adaptors.put(PlinkRod.class, new PlinkRodAdaptor()); - adaptors.put(HapMapROD.class, new HapMapAdaptor()); + adaptors.put(HapMapFeature.class, new HapMapAdaptor()); adaptors.put(GeliTextFeature.class, new GeliTextAdaptor()); adaptors.put(rodGELI.class, new GeliAdaptor()); adaptors.put(VariantContext.class, new VariantContextAdaptor()); @@ -110,128 +111,14 @@ public class VariantContextAdaptors { Map attributes = new HashMap(); attributes.put(VariantContext.ID_KEY, dbsnp.getRsID()); Collection genotypes = null; - VariantContext vc = new VariantContext(name, GenomeLocParser.createGenomeLoc(dbsnp.getChr(),dbsnp.getStart(),dbsnp.getEnd()), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes); + VariantContext vc = new VariantContext(name, dbsnp.getChr(),dbsnp.getStart(),dbsnp.getEnd(), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes); return vc; } else return null; // can't handle anything else } } - private static class VCFRecordAdaptor extends VCAdaptor { - VariantContext convert(String name, Object input, ReferenceContext ref) { - return vcfToVariantContext(name, (VCFRecord)input, ref); - } - } - private static VariantContext vcfToVariantContext(String name, VCFRecord vcf, ReferenceContext ref) { - if ( vcf.isReference() || vcf.isSNP() || vcf.isIndel() ) { - // add the reference allele - if ( ! Allele.acceptableAlleleBases(vcf.getReference(),true) ) { - System.out.printf("Excluding vcf record %s%n", vcf); - return null; - } - - Set filters = vcf.isFiltered() ? new HashSet(Arrays.asList(vcf.getFilteringCodes())) : null; - Map attributes = new HashMap(vcf.getInfoValues()); - attributes.put(VariantContext.ID_KEY, vcf.getID()); - - // add all of the alt alleles - List alleles = new ArrayList(); - Allele refAllele = determineRefAllele(vcf, ref); - alleles.add(refAllele); - - for ( VCFGenotypeEncoding alt : vcf.getAlternateAlleles() ) { - if ( ! Allele.acceptableAlleleBases(alt.getBases(),false) ) { - //System.out.printf("Excluding vcf record %s%n", vcf); - return null; - } - - Allele allele; - // special case: semi-deletion - if ( vcf.isDeletion() && refAllele.length() > alt.getLength() ) { - byte[] semiDeletion = new byte[refAllele.length() - alt.getLength()]; - System.arraycopy(ref.getBases(), alt.getLength(), semiDeletion, 0, refAllele.length() - alt.getLength()); - allele = Allele.create(new String(semiDeletion), false); - } else { - allele = Allele.create(alt.getBases(), false); - } - if ( ! allele.isNoCall() ) - alleles.add(allele); - } - - Map genotypes = new HashMap(); - for ( VCFGenotypeRecord vcfG : vcf.getVCFGenotypeRecords() ) { - List genotypeAlleles = new ArrayList(); - for ( VCFGenotypeEncoding s : vcfG.getAlleles() ) { - Allele a = Allele.getMatchingAllele(alleles, s.getBases()); - if ( a == null ) { - if ( vcf.isIndel() ) - genotypeAlleles.add(refAllele); - else - throw new StingException("Invalid VCF genotype allele " + s + " in VCF " + vcf); - } else { - genotypeAlleles.add(a); - } - } - - Map fields = new HashMap(); - for ( Map.Entry e : vcfG.getFields().entrySet() ) { - // todo -- fixme if we put GQ and FT into key itself - if ( ! e.getKey().equals(VCFConstants.GENOTYPE_QUALITY_KEY) && ! e.getKey().equals(VCFConstants.GENOTYPE_FILTER_KEY) ) - fields.put(e.getKey(), e.getValue()); - } - - Set genotypeFilters = new HashSet(); - if ( vcfG.isFiltered() ) // setup the genotype filter fields - genotypeFilters.addAll(Arrays.asList(vcfG.getFields().get(VCFConstants.GENOTYPE_FILTER_KEY).split(";"))); - - double qual = vcfG.isMissingQual() ? VariantContext.NO_NEG_LOG_10PERROR : vcfG.getNegLog10PError(); - Genotype g = new Genotype(vcfG.getSampleName(), genotypeAlleles, qual, genotypeFilters, fields, vcfG.getPhaseType() == VCFGenotypeRecord.PHASE.PHASED); - genotypes.put(g.getSampleName(), g); - } - - double qual = vcf.isMissingQual() ? VariantContext.NO_NEG_LOG_10PERROR : vcf.getNegLog10PError(); - - GenomeLoc loc = GenomeLocParser.createGenomeLoc(vcf.getChr(),vcf.getStart()); - if ( vcf.isDeletion() ) - loc = GenomeLocParser.createGenomeLoc(loc.getContig(), loc.getStart(), loc.getStart()+refAllele.length()-1); - - VariantContext vc = new VariantContext(name, loc, alleles, genotypes, qual, filters, attributes); - return vc; - } else - return null; // can't handle anything else - } - - private static Allele determineRefAllele(VCFRecord vcf, ReferenceContext ref) { - if ( ref == null ) - throw new StingException("Illegal determineRefAllele call!"); - - Allele refAllele; - if ( vcf.isInsertion() ) { - refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true); -// } else if ( ref == null ) { -// refAllele = Allele.create(vcf.getReference(), true); - } else if ( !vcf.isIndel() ) { - refAllele = Allele.create(ref.getBase(), true); - if ( (char)ref.getBase() != vcf.getReference().charAt(0) ) - throw new StingException("The VCF reference base (" + vcf.getReference().charAt(0) + ") doesn't match the actual reference base (" + (char)ref.getBase() + "); please check that you are using the appropriate reference file"); - } else if ( vcf.isDeletion() ) { - int start = vcf.getPosition() - (int)ref.getWindow().getStart() + 1; - int delLength = 0; - for ( VCFGenotypeEncoding enc : vcf.getAlternateAlleles() ) { - if ( enc.getLength() > delLength ) - delLength = enc.getLength(); - } - if ( delLength > ref.getWindow().getStop() - vcf.getPosition() ) - throw new IllegalArgumentException("Length of deletion is larger than reference context provided at " + ref.getLocus()); - - refAllele = deletionAllele(ref, start, delLength); - } else { - throw new UnsupportedOperationException("Conversion of VCF type " + vcf.getType() + " is not supported."); - } - - return refAllele; - } private static Allele deletionAllele(ReferenceContext ref, int start, int len) { byte[] deletion = new byte[len]; @@ -314,7 +201,7 @@ public class VariantContextAdaptors { // create the variant context try { GenomeLoc loc = GenomeLocParser.setStop(plink.getLocation(), plink.getLocation().getStop() + plink.getLength()-1); - VariantContext vc = new VariantContext(plink.getVariantName(), loc, VCAlleles, genotypes); + VariantContext vc = VariantContextUtils.toVC(plink.getVariantName(), loc, VCAlleles, genotypes); return vc; } catch (IllegalArgumentException e) { throw new IllegalArgumentException(e.getMessage() + "; please make sure that e.g. a sample isn't present more than one time in your ped file"); @@ -399,7 +286,7 @@ public class VariantContextAdaptors { // add the call to the genotype list, and then use this list to create a VariantContext genotypes.add(call); alleles.add(refAllele); - VariantContext vc = new VariantContext(name, GenomeLocParser.createGenomeLoc(geli.getChr(),geli.getStart()), alleles, genotypes, geli.getLODBestToReference(), null, attributes); + VariantContext vc = VariantContextUtils.toVC(name, GenomeLocParser.createGenomeLoc(geli.getChr(),geli.getStart()), alleles, genotypes, geli.getLODBestToReference(), null, attributes); return vc; } else return null; // can't handle anything else @@ -472,7 +359,7 @@ public class VariantContextAdaptors { // add the call to the genotype list, and then use this list to create a VariantContext genotypes.add(call); - VariantContext vc = new VariantContext(name, ((rodGELI) input).getLocation(), alleles, genotypes, geli.getBestToReferenceLod(), null, attributes); + VariantContext vc = VariantContextUtils.toVC(name, ((rodGELI) input).getLocation(), alleles, genotypes, geli.getBestToReferenceLod(), null, attributes); return vc; } @@ -506,7 +393,7 @@ public class VariantContextAdaptors { if ( ref == null ) throw new UnsupportedOperationException("Conversion from HapMap to VariantContext requires a reference context"); - HapMapROD hapmap = (HapMapROD)input; + HapMapFeature hapmap = (HapMapFeature)input; // add the reference allele HashSet alleles = new HashSet(); @@ -539,7 +426,7 @@ public class VariantContextAdaptors { genotypes.put(samples[i], g); } - VariantContext vc = new VariantContext(name, hapmap.getLocation(), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, new HashMap()); + VariantContext vc = new VariantContext(name, hapmap.getChr(), hapmap.getStart(), hapmap.getEnd(), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, new HashMap()); return vc; } } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleFeature.java b/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleFeature.java index ee1dc9154..dd946287d 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleFeature.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleFeature.java @@ -27,15 +27,11 @@ package org.broadinstitute.sting.gatk.refdata.features.beagle; import org.broad.tribble.Feature; -import org.broad.tribble.Feature; - import java.util.ArrayList; -import java.util.List; import java.util.Map; import net.sf.samtools.util.StringUtil; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; -import org.broadinstitute.sting.utils.GenomeLoc; +import org.broad.tribble.util.variantcontext.Allele; public class BeagleFeature implements Feature { diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/features/vcf4/VCF4Codec.java b/java/src/org/broadinstitute/sting/gatk/refdata/features/vcf4/VCF4Codec.java deleted file mode 100644 index abb6ca1ac..000000000 --- a/java/src/org/broadinstitute/sting/gatk/refdata/features/vcf4/VCF4Codec.java +++ /dev/null @@ -1,602 +0,0 @@ -package org.broadinstitute.sting.gatk.refdata.features.vcf4; - -import org.broad.tribble.Feature; -import org.broad.tribble.FeatureCodec; -import org.broad.tribble.exception.CodecLineParsingException; -import org.broad.tribble.readers.LineReader; -import org.broad.tribble.util.ParsingUtils; -import org.broad.tribble.vcf.*; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.collections.Pair; - -import java.io.IOException; -import java.util.*; - - -/** - * a feature codec for the VCF 4 specification. Our aim is to read in the records and convert to VariantContext as - * quickly as possible, relying on VariantContext to do the validation of any contradictory (or malformed) record parameters. - */ -public class VCF4Codec implements FeatureCodec, NameAwareCodec { - - - // we have to store the list of strings that make up the header until they're needed - private VCFHeader header = null; - - private VCFHeaderVersion version = VCFHeaderVersion.VCF4_0; - // used to convert the index of the alternate allele in genotypes to a integer index - private static int ZERO_CHAR = (byte)'0'; - - // a mapping of the allele - private static Map> alleleMap = new HashMap>(3); - - // cache the genotyope values - private static String[] GTValueArray = new String[100]; - - // for performance testing purposes - public static boolean validate = true; - - // a key optimization -- we need a per thread string parts array, so we don't allocate a big array over and over - // todo: make this thread safe? - private String[] parts = null; - - // for performance we cache the hashmap of filter encodings for quick lookup - private HashMap> filterHash = new HashMap>(); - - // a set of the genotype keys? - private String[] genotypeKeyArray = new String[100]; - - // a mapping of the VCF fields to their type, filter fields, and format fields, for quick lookup to validate against - TreeMap infoFields = new TreeMap(); - TreeMap formatFields = new TreeMap(); - ArrayList filterFields = new ArrayList(); - - // do we want to validate the info, format, and filter fields - private final boolean validateFromHeader = false; - - // we store a name to give to each of the variant contexts we emit - private String name = "Unknown"; - - private int lineNo = 0; - - // some classes need to transform the line before - private LineTransform transformer = null; - - /** - * @param reader the line reader to take header lines from - * @return the number of header lines - */ - @Override - public Object readHeader(LineReader reader) { - List headerStrings = new ArrayList(); - - String line; - try { - boolean foundHeaderVersion = false; - while ((line = reader.readLine()) != null) { - lineNo++; - if (line.startsWith(VCFHeader.METADATA_INDICATOR)) { - String[] lineFields = line.substring(2).split("="); - if (lineFields.length == 2 && - VCFHeaderVersion.isVersionString(lineFields[1]) && VCFHeaderVersion.isFormatString(lineFields[0])) { - foundHeaderVersion = true; - this.version = VCFHeaderVersion.toHeaderVersion(lineFields[1]); - } - headerStrings.add(line); - } - else if (line.startsWith(VCFHeader.HEADER_INDICATOR)) { - if (!foundHeaderVersion) { - throw new CodecLineParsingException("We never saw a header line specifying VCF version"); - } - return createHeader(headerStrings, line); - } - else { - throw new CodecLineParsingException("We never saw the required header line (starting with one #) for the input VCF file"); - } - - } - } catch (IOException e) { - throw new RuntimeException("IO Exception ", e); - } - throw new CodecLineParsingException("We never saw the required header line (starting with one #) for the input VCF file"); - - } - - /** - * create a VCF header - * @param headerStrings a list of strings that represent all the ## entries - * @param line the single # line (column names) - * @return the count of header lines - */ - public Object createHeader(List headerStrings, String line) { - headerStrings.add(line); - header = VCFReaderUtils.createHeader(headerStrings, this.version); - - // setup our look-up lists for validation - for ( VCFHeaderLine hl : header.getMetaData() ) { - if ( hl instanceof VCFFilterHeaderLine ) - this.filterFields.add(((VCFFilterHeaderLine)hl).getName()); - if ( hl instanceof VCFFormatHeaderLine ) - this.formatFields.put(((VCFFormatHeaderLine)hl).getName(), ((VCFFormatHeaderLine)hl).getType()); - if ( hl instanceof VCFInfoHeaderLine ) - this.infoFields.put(((VCFInfoHeaderLine)hl).getName(), ((VCFInfoHeaderLine)hl).getType()); - } - // sort the lists so we can binary search them later on - Collections.sort(filterFields); - - return header; - } - - /** - * the fast decode function - * @param line the line of text for the record - * @return a feature, (not guaranteed complete) that has the correct start and stop - */ - public Feature decodeLoc(String line) { - return reallyDecode(line, false); - } - - /** - * decode the line into a feature (VariantContext) - * @param line the line - * @return a VariantContext - */ - public Feature decode(String line) { - return reallyDecode(line, true); - } - - private Feature reallyDecode(String line, boolean parseGenotypes) { - // the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line - if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null; - - if (parts == null) - parts = new String[header.getColumnCount()]; - - int nParts = ParsingUtils.split(line, parts, VCFConstants.FIELD_SEPARATOR.charAt(0)); - - // our header cannot be null, we need the genotype sample names and counts - if (header == null) throw new IllegalStateException("VCF Header cannot be null"); - - // check to make sure the split resulted in the correct number of fields (8 + (1 + genotytpe counts if it has genotypes) - if (nParts != header.getColumnCount()) - throw new IllegalArgumentException("we expected " + header.getColumnCount() + " columns and we got " + nParts + " for line " + line); - - return parseVCFLine(parts, parseGenotypes); - } - - /** - * create a an allele from an index and an array of alleles - * @param index the index - * @param alleles the alleles - * @return an Allele - */ - private static Allele oneAllele(char index, List alleles) { - if ( index == VCFConstants.EMPTY_ALLELE.charAt(0) ) - return Allele.NO_CALL; - int i = ((byte)index) - ZERO_CHAR; - return alleles.get(i); - } - - - /** - * parse genotype alleles from the genotype string - * @param GT GT string - * @param alleles list of possible alleles - * @param cache cache of alleles for GT - * @return the allele list for the GT string - */ - private List parseGenotypeAlleles(String GT, List alleles, Map> cache) { - // this should cache results [since they are immutable] and return a single object for each genotype - if ( GT.length() != 3 && GT.length() != 1 ) - throw new VCFParserException("Unreasonable number of alleles: " + "GT=" + GT + " length=" + GT.length()); // 0/1 => barf on 10/0 - - List GTAlleles = cache.get(GT); - - if ( GTAlleles == null ) { - Allele allele1 = oneAllele(GT.charAt(0), alleles); - GTAlleles = GT.length() == 3 ? Arrays.asList(allele1, oneAllele(GT.charAt(2), alleles)) : Arrays.asList(allele1); - cache.put(GT, GTAlleles); - } - - return GTAlleles; - } - - /** - * parse out the info fields - * @param infoField the fields - * @param id the indentifier - * @return a mapping of keys to objects - */ - private Map parseInfo(String infoField, String id) { - Map attributes = new HashMap(); - - if ( !infoField.equals(VCFConstants.EMPTY_INFO_FIELD) ) { - for ( String field : Utils.split(infoField, VCFConstants.INFO_FIELD_SEPARATOR) ) { - String key; - Object value; - - int eqI = field.indexOf("="); - if ( eqI != -1 ) { - key = field.substring(0, eqI); - String str = field.substring(eqI+1, field.length()); - - // lets see if the string contains a , separator - if ( str.contains(",") ) - value = Arrays.asList(str.split(",")); - else - value = str; - } else { - key = field; - value = new Boolean(true); - } - - attributes.put(key, value); - } - } - // validate the fields - validateFields(attributes.keySet(), new ArrayList(infoFields.keySet())); - - attributes.put(VariantContext.ID_KEY, id); - return attributes; - } - - /** - * validate the attributes against the stored fields of the appopriate type - * @param attributes the list of fields to check for inclusion against the field array - * @param fields the master list; all attributes must be in this list to validate - */ - private void validateFields(Set attributes, List fields) { - // validate the info fields - if (validateFromHeader) { - for (String attr : attributes) - if (Collections.binarySearch(fields,attr) < 0) - throw new VCFParserException("Unable to find field describing attribute " + attr); - } - } - - /** - * parse out the qual value - * @param qualString the quality string - * @return return a double - */ - private Double parseQual(String qualString) { - if ( qualString.equals(VCFConstants.MISSING_VALUE_v4) || qualString.equals(VCFConstants.MISSING_QUALITY_v3) ) - return VariantContext.NO_NEG_LOG_10PERROR; - return Double.valueOf(qualString) / 10.0; - } - - /** - * parse out the alleles - * @param ref the reference base - * @param alts a string of alternates to break into alleles - * @return a list of alleles, and a pair of the shortest and longest sequence - */ - private List parseAlleles(String ref, String alts) { - List alleles = new ArrayList(2); // we are almost always biallelic - // ref - if (!checkAllele(ref, true)) - throw new VCFParserException("Unable to parse out correct reference allele, we saw = " + ref); - Allele refAllele = Allele.create(ref, true); - alleles.add(refAllele); - - if ( alts.indexOf(",") == -1 ) // only 1 alternatives, don't call string split - parseSingleAllele(alleles, alts, false); - else - for ( String alt : Utils.split(alts, ",") ) - parseSingleAllele(alleles, alt, false); - - return alleles; - } - - /** - * check to make sure the allele is an acceptable allele - * @param allele the allele to check - * @param isRef are we the reference allele? - * @return true if the allele is fine, false otherwise - */ - private boolean checkAllele(String allele,boolean isRef) { - if (allele.contains("<")) { - Utils.warnUser("We are currently unable to parse out CNV encodings in VCF, we saw the following allele = " + allele); - return false; - } - else { - // check for VCF3.3 insertions or deletions - if (this.version != VCFHeaderVersion.VCF4_0) { - if ((allele.toUpperCase().charAt(0) == 'D') || (allele.toUpperCase().charAt(0) == 'D')) - throw new VCFParserException("Insertions/Deletions are not supported when reading 3.x VCF's. Please" + - " convert your file to VCF 4.0 using VCFTools, available at http://vcftools.sourceforge.net/index.html"); - } - - if ( ! Allele.acceptableAlleleBases(allele,isRef) ) - throw new VCFParserException("Unparsable vcf record with allele " + allele); - } - return true; - } - - /** - * parse a single allele, given the allele list - * @param alleles the alleles available - * @param alt the allele to parse - * @param isRef are we the reference allele? - */ - private void parseSingleAllele(List alleles, String alt, boolean isRef) { - if (!checkAllele(alt,isRef)) - throw new VCFParserException("Unable to parse out correct alt allele, we saw = " + alt); - - Allele allele = Allele.create(alt, false); - if ( ! allele.isNoCall() ) - alleles.add(allele); - } - - /** - * parse the filter string, first checking to see if we already have parsed it in a previous attempt - * @param filterString the string to parse - * @return a set of the filters applied - */ - private Set parseFilters(String filterString) { - - // null for unfiltered - if ( filterString.equals(VCFConstants.UNFILTERED) ) - return null; - - // empty set for passes filters - LinkedHashSet fFields = new LinkedHashSet(); - - if ( this.version == VCFHeaderVersion.VCF4_0 ) { - if ( filterString.equals(VCFConstants.PASSES_FILTERS_v4) ) - return fFields; - if ( filterString.equals(VCFConstants.PASSES_FILTERS_v3) ) - throw new StingException(VCFConstants.PASSES_FILTERS_v3 + " is an invalid filter name in vcf4.0"); - } else if ( filterString.equals(VCFConstants.PASSES_FILTERS_v3) ) { - return fFields; - } - - // do we have the filter string cached? - if ( filterHash.containsKey(filterString) ) - return filterHash.get(filterString); - - // otherwise we have to parse and cache the value - if ( filterString.indexOf(VCFConstants.FILTER_CODE_SEPARATOR) == -1 ) - fFields.add(filterString); - else - fFields.addAll(Utils.split(filterString, VCFConstants.FILTER_CODE_SEPARATOR)); - - filterHash.put(filterString, fFields); - - validateFields(fFields, filterFields); - return fFields; - } - - /** - * parse out the VCF line - * - * @param parts the parts split up - * @param parseGenotypes whether to parse genotypes or not - * @return a variant context object - */ - private VariantContext parseVCFLine(String[] parts, boolean parseGenotypes) { -// try { - // increment the line count - lineNo++; - - // parse out the required fields - String contig = parts[0]; - long pos = Long.valueOf(parts[1]); - String id = parts[2]; - String ref = parts[3].toUpperCase(); - String alts = parts[4].toUpperCase(); - Double qual = parseQual(parts[5]); - String filter = parts[6]; - String info = parts[7]; - - // get our alleles, filters, and setup an attribute map - List alleles = parseAlleles(ref, alts); - Set filters = parseFilters(filter); - Map attributes = parseInfo(info, id); - - // find out our current location, and clip the alleles down to their minimum length - Pair> locAndAlleles; - if ( !isSingleNucleotideEvent(alleles) ) { - locAndAlleles = clipAlleles(contig, pos, ref, alleles); - } else { - locAndAlleles = new Pair>(GenomeLocParser.createGenomeLoc(contig, pos), alleles); - } - - // a map to store our genotypes - Map genotypes = null; - - // do we have genotyping data - if (parts.length > 8 && parseGenotypes) { - genotypes = createGenotypeMap(parts, locAndAlleles, 8); - } - - VariantContext vc = new VariantContext(name, locAndAlleles.first, locAndAlleles.second, genotypes, qual, filters, attributes); - - // Trim bases of all alleles if necessary - return VariantContextUtils.createVariantContextWithTrimmedAlleles(vc); - } - - private boolean isSingleNucleotideEvent(List alleles) { - for ( Allele a : alleles ) { - if ( a.length() > 1 ) - return false; - } - return true; - } - - class VCFParserException extends StingException { - public VCFParserException(String msg) { - super("Line " + lineNo + " generated parser exception " + msg); - } - - public VCFParserException(String msg, Throwable throwable) { - super("Line " + lineNo + " generated parser exception " + msg, throwable); - } - } - - /** - * create a genotype map - * @param parts the string parts - * @param locAndAlleles the locations and the list of alleles - * @param formatFieldLocation the position in the parts array that the genotype strings start - * @return a mapping of sample name to genotype object - */ - protected Map createGenotypeMap(String[] parts, Pair> locAndAlleles, int formatFieldLocation) { - Map genotypes = new LinkedHashMap(Math.max(parts.length - formatFieldLocation, 1)); - - // get the format keys - int nGTKeys = ParsingUtils.split(parts[formatFieldLocation], genotypeKeyArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR.charAt(0)); - - // cycle through the sample names - Iterator sampleNameIterator = header.getGenotypeSamples().iterator(); - - // clear out our allele mapping - alleleMap.clear(); - - // cycle through the genotype strings - for (int genotypeOffset = formatFieldLocation + 1; genotypeOffset < parts.length; genotypeOffset++) { - int GTValueSplitSize = ParsingUtils.split(parts[genotypeOffset], GTValueArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR.charAt(0)); - - double GTQual = VariantContext.NO_NEG_LOG_10PERROR; - Set genotypeFilters = null; - Map gtAttributes = null; - String sampleName = sampleNameIterator.next(); - - // check to see if the value list is longer than the key list, which is a problem - if (nGTKeys < GTValueSplitSize) - throw new VCFParserException("Too few keys for compared to the value string " + sampleName + ", keys = " + parts[8] + " values = " + parts[genotypeOffset]); - - int genotypeAlleleLocation = -1; - if (nGTKeys >= 1) { - gtAttributes = new HashMap(nGTKeys - 1); - for (int i = 0; i < nGTKeys; i++) { - if (i >= GTValueSplitSize) { - if (genotypeKeyArray[i].equals(VCFConstants.GENOTYPE_QUALITY_KEY)) - GTQual = parseQual(VCFConstants.MISSING_VALUE_v4); - else if (genotypeKeyArray[i].equals(VCFConstants.GENOTYPE_FILTER_KEY)) - genotypeFilters = parseFilters(VCFConstants.MISSING_VALUE_v4); - else - gtAttributes.put(genotypeKeyArray[i],VCFConstants.MISSING_VALUE_v4); - } - else if (genotypeKeyArray[i].equals(VCFConstants.GENOTYPE_KEY)) - if (i != 0) - throw new VCFParserException("Saw GT at position " + i + ", it must be at the first position for genotypes. At location = " + locAndAlleles.first); - else - genotypeAlleleLocation = i; - else if (genotypeKeyArray[i].equals(VCFConstants.GENOTYPE_QUALITY_KEY)) - GTQual = parseQual(GTValueArray[i]); - else if (genotypeKeyArray[i].equals(VCFConstants.GENOTYPE_FILTER_KEY)) - genotypeFilters = parseFilters(GTValueArray[i]); - else { - if (this.version != VCFHeaderVersion.VCF4_0 && GTValueArray[i].equals(VCFConstants.MISSING_GENOTYPE_QUALITY_v3)) - GTValueArray[i] = VCFConstants.MISSING_VALUE_v4; - gtAttributes.put(genotypeKeyArray[i], GTValueArray[i]); - } - } - // validate the format fields - validateFields(gtAttributes.keySet(), new ArrayList(formatFields.keySet())); - } - // check to make sure we found a gentoype field - if (genotypeAlleleLocation < 0) throw new VCFParserException("Unable to find required field GT for record " + locAndAlleles.first); - - // assuming allele list length in the single digits, could be bad. Check for > 1 for haploid genotypes - boolean phased = GTValueArray[genotypeAlleleLocation].length() > 1 && GTValueArray[genotypeAlleleLocation].charAt(1) == '|'; - - // add it to the list - genotypes.put(sampleName, new Genotype(sampleName, - parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], locAndAlleles.second, alleleMap), - GTQual, - genotypeFilters, - gtAttributes, - phased)); - - } - return genotypes; - } - - /** - * clip the alleles, based on the reference - * - * @param contig our contig position - * @param position the unadjusted start position (pre-clipping) - * @param ref the reference string - * @param unclippedAlleles the list of unclipped alleles - * @return a list of alleles, clipped to the reference - */ - static Pair> clipAlleles(String contig, long position, String ref, List unclippedAlleles) { - List newAlleleList = new ArrayList(); - - // find the preceeding string common to all alleles and the reference - boolean clipping = true; - for (Allele a : unclippedAlleles) - if (a.length() < 1 || (a.getBases()[0] != ref.getBytes()[0])) { - clipping = false; - } - int forwardClipping = (clipping) ? 1 : 0; - - int reverseClipped = 0; - clipping = true; - while (clipping) { - for (Allele a : unclippedAlleles) - if (a.length() - reverseClipped <= forwardClipping || a.length() - forwardClipping == 0) - clipping = false; - else if (a.getBases()[a.length()-reverseClipped-1] != ref.getBytes()[ref.length()-reverseClipped-1]) - clipping = false; - if (clipping) reverseClipped++; - } - - for (Allele a : unclippedAlleles) - newAlleleList.add(Allele.create(Arrays.copyOfRange(a.getBases(),forwardClipping,a.getBases().length-reverseClipped),a.isReference())); - - // the new reference length - int refLength = ref.length() - reverseClipped; - - return new Pair>(GenomeLocParser.createGenomeLoc(contig,position,(position+Math.max(refLength - 1,0))), - newAlleleList); - } - - /** - * - * @return the type of record - */ - @Override - public Class getFeatureType() { - return VariantContext.class; - } - - /** - * get the name of this codec - * @return our set name - */ - public String getName() { - return name; - } - - /** - * set the name of this codec - * @param name new name - */ - public void setName(String name) { - this.name = name; - } - - public static interface LineTransform { - public String lineTransform(String line); - } - - public LineTransform getTransformer() { - return transformer; - } - - public void setTransformer(LineTransform transformer) { - this.transformer = transformer; - } - -} diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java index 9157f1bbb..dc5de4e20 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java @@ -55,7 +55,6 @@ public class RODTrackBuilder implements RMDTrackBuilder { // All known ROD types Types.put("GELI", rodGELI.class); Types.put("Table", TabularROD.class); - Types.put("HapMap", HapMapROD.class); Types.put("Intervals", IntervalRod.class); Types.put("Plink", PlinkRod.class); } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilder.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilder.java index 38f55b560..7a358db2d 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilder.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilder.java @@ -37,7 +37,6 @@ import org.broad.tribble.index.linear.LinearIndexCreator; import org.broad.tribble.source.BasicFeatureSource; import org.broad.tribble.util.LittleEndianInputStream; import org.broad.tribble.util.LittleEndianOutputStream; -import org.broad.tribble.vcf.NameAwareCodec; import org.broadinstitute.sting.gatk.refdata.tracks.TribbleTrack; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException; @@ -70,6 +69,9 @@ public class TribbleRMDTrackBuilder extends PluginManager implemen // what index to use static boolean useLinearIndex = true; + // our bin size + static int binSize = 1600; + // the linear index extension public static final String indexExtension = ".idx"; @@ -239,11 +241,11 @@ public class TribbleRMDTrackBuilder extends PluginManager implemen Index index = IndexFactory.loadIndex(indexFile.getAbsolutePath()); // check if the file is up-to date (filestamp and version check) - if (index.isCurrentVersion() && indexFile.lastModified() > inputFile.lastModified()) + if (/*index.isCurrentVersion() && */ indexFile.lastModified() > inputFile.lastModified()) return index; else if (indexFile.lastModified() < inputFile.lastModified()) logger.warn("Index file " + indexFile + " is out of date (index older than input file), deleting and updating the index file"); - else // we've loaded an old version of the index, we want to remove it + else // we've loaded an old version of the index, we want to remove it <-- currently not used, but may re-enable logger.warn("Index file " + indexFile + " is out of date (old version), deleting and updating the index file"); // however we got here, remove the index and return null @@ -300,10 +302,13 @@ public class TribbleRMDTrackBuilder extends PluginManager implemen // this can take a while, let them know what we're doing logger.info("Creating Tribble index in memory for file " + inputFile); IndexCreator creator; - if (useLinearIndex) + if (useLinearIndex) { creator = new LinearIndexCreator(inputFile,codec,null); - else + ((LinearIndexCreator)creator).setBinWidth(binSize); + } else { creator = new IntervalIndexCreator(inputFile, codec, null); + ((IntervalIndexCreator)creator).setFeaturesPerInterval(binSize); + } return creator.createIndex(); } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIterator.java b/java/src/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIterator.java index f2fb8ffd5..649ffeb24 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/utils/FeatureToGATKFeatureIterator.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.refdata.utils; import net.sf.samtools.util.CloseableIterator; import org.broad.tribble.Feature; +import org.broad.tribble.iterators.CloseableTribbleIterator; import java.util.Iterator; @@ -38,10 +39,10 @@ import java.util.Iterator; * a wrapper on Tribble feature iterators so that they produce GATKFeatures (which produce GenomeLocs) */ public class FeatureToGATKFeatureIterator implements CloseableIterator { - private final CloseableIterator iterator; + private final CloseableTribbleIterator iterator; private final String name; - public FeatureToGATKFeatureIterator(CloseableIterator iter, String name) { + public FeatureToGATKFeatureIterator(CloseableTribbleIterator iter, String name) { this.name = name; this.iterator = iter; } @@ -63,6 +64,6 @@ public class FeatureToGATKFeatureIterator implements CloseableIterator { throw new IllegalStateException("No rod data is present"); Object rod = rods.get(0); - if ( rod instanceof HapMapROD ) - samples.addAll(Arrays.asList(((HapMapROD)rod).getSampleIDs())); + if ( rod instanceof HapMapFeature) + samples.addAll(Arrays.asList(((HapMapFeature)rod).getSampleIDs())); else samples.addAll(vc.getSampleNames()); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/Alignability.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/Alignability.java index e01fd44db..bd52443b3 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/Alignability.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/Alignability.java @@ -1,10 +1,10 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFHeaderLineType; import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.TabularROD; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java index a86c34479..6cabbd34d 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java @@ -25,11 +25,12 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFHeaderLineType; import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; import org.broadinstitute.sting.utils.*; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java index 7ab8a0819..e9c654dd6 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java @@ -1,7 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.util.variantcontext.Genotype; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import java.util.Map; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java index 7cd8ee5fa..7114e7dbe 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java @@ -25,12 +25,13 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.util.variantcontext.Allele; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFHeaderLineType; import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broad.tribble.vcf.VCFConstants; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java index d7d9be1b0..9e5b515d3 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java @@ -1,11 +1,11 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFHeaderLineType; import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broad.tribble.vcf.VCFConstants; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java index 81c2c746a..fec480ca5 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java @@ -1,9 +1,11 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.util.variantcontext.Allele; +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFFormatHeaderLine; import org.broad.tribble.vcf.VCFHeaderLineType; import org.broadinstitute.sting.gatk.contexts.*; -import org.broadinstitute.sting.gatk.contexts.variantcontext.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; import org.broadinstitute.sting.utils.pileup.*; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java index a63a1f75d..da9d0ea0c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java @@ -1,10 +1,10 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFHeaderLineType; import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; import org.broadinstitute.sting.utils.BaseUtils; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java index 878e2e317..dada26b6a 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java @@ -24,6 +24,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFHeaderLineType; import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java index 5705347c1..46058a826 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java @@ -1,10 +1,11 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFHeaderLineType; import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; import org.broadinstitute.sting.utils.genotype.HardyWeinbergCalculation; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java index 2545e7717..ab0115160 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java @@ -1,10 +1,10 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFHeaderLineType; import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java index f10b2f70b..851cc2a91 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java @@ -1,10 +1,10 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFHeaderLineType; import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java index a5eaa5289..0441ec819 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java @@ -1,11 +1,11 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFHeaderLineType; import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broad.tribble.vcf.VCFConstants; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java index fdcd24fe9..fb3bd8427 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java @@ -1,10 +1,11 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFHeaderLineType; import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java index 47225e11a..0f03a2576 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java @@ -1,11 +1,11 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFHeaderLineType; import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broad.tribble.vcf.VCFConstants; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java index 3f230e49e..68696fe04 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java @@ -1,8 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; import org.broadinstitute.sting.utils.*; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java index 8387825e6..1cf195636 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java @@ -1,12 +1,12 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFConstants; import org.broad.tribble.vcf.VCFHeaderLineType; import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import java.util.Arrays; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java index e40f7df8e..8351d718e 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java @@ -1,10 +1,10 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFHeaderLineType; import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index 77e84768f..2afc513bb 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -25,13 +25,14 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFHeader; import org.broad.tribble.vcf.VCFHeaderLine; import org.broad.tribble.vcf.VCFCompoundHeaderLine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.walkers.*; @@ -222,7 +223,7 @@ public class VariantAnnotator extends RodWalker { vcfWriter.add(annotatedVC, ref.getBase()); } else { // check to see if the buffered context is different (in location) this context - if ( indelBufferContext != null && ! indelBufferContext.iterator().next().getLocation().equals(annotatedVCs.iterator().next().getLocation()) ) { + if ( indelBufferContext != null && ! VariantContextUtils.getLocation(indelBufferContext.iterator().next()).equals(VariantContextUtils.getLocation(annotatedVCs.iterator().next())) ) { for ( VariantContext annotatedVC : indelBufferContext ) vcfWriter.add(annotatedVC, ref.getBase()); indelBufferContext = annotatedVCs; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index b9526be48..eb7ee719c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -38,12 +38,12 @@ import java.util.Set; import java.util.Map.Entry; import org.broad.tribble.dbsnp.DbSNPFeature; +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; @@ -252,7 +252,7 @@ public class VariantAnnotatorEngine { //Create a separate VariantContext (aka. output line) for each element in infoAnnotationOutputsList Collection returnValue = new LinkedList(); for(Map infoAnnotationOutput : infoAnnotationOutputsList) { - returnValue.add( new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, infoAnnotationOutput) ); + returnValue.add( new VariantContext(vc.getName(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, infoAnnotationOutput) ); } return returnValue; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java index 7c6618014..946780959 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java @@ -1,11 +1,11 @@ package org.broadinstitute.sting.gatk.walkers.annotator.interfaces; +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFFormatHeaderLine; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; import java.util.Map; import java.util.List; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java index 3bd74b8c5..53444db01 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java @@ -1,10 +1,10 @@ package org.broadinstitute.sting.gatk.walkers.annotator.interfaces; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import java.util.Map; import java.util.List; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java index 4972bff93..305a04dae 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java @@ -25,9 +25,9 @@ package org.broadinstitute.sting.gatk.walkers.fasta; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.GenomeLoc; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java index 7dfa6647a..07510b12b 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.filters; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.utils.GenomeLoc; public class ClusteredSnps { @@ -27,7 +28,7 @@ public class ClusteredSnps { continue; // find the nth variant - GenomeLoc left = variants[i].getVariantContext().getLocation(); + GenomeLoc left = VariantContextUtils.getLocation(variants[i].getVariantContext()); GenomeLoc right = null; int snpsSeen = 1; @@ -35,7 +36,7 @@ public class ClusteredSnps { while ( ++currentIndex < variants.length ) { if ( variants[currentIndex] != null && variants[currentIndex].getVariantContext() != null && variants[currentIndex].getVariantContext().isVariant() ) { if ( ++snpsSeen == snpThreshold ) { - right = variants[currentIndex].getVariantContext().getLocation(); + right = VariantContextUtils.getLocation(variants[currentIndex].getVariantContext()); break; } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContext.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContext.java index 6d0afd594..68be54097 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContext.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContext.java @@ -25,8 +25,8 @@ package org.broadinstitute.sting.gatk.walkers.filters; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index 6a6678ee7..d6aaf5f3a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -25,11 +25,11 @@ package org.broadinstitute.sting.gatk.walkers.filters; +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.*; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -208,7 +208,7 @@ public class VariantFiltrationWalker extends RodWalker { filters.add(exp.name); } - VariantContext filteredVC = new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), filters, vc.getAttributes()); + VariantContext filteredVC = new VariantContext(vc.getName(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), filters, vc.getAttributes()); writeVCF(filteredVC, context.getReferenceContext().getBase()); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/BatchedCallsMerger.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/BatchedCallsMerger.java index 3f65c0677..245418c71 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/BatchedCallsMerger.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/BatchedCallsMerger.java @@ -24,10 +24,10 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.contexts.*; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/CreateTriggerTrack.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/CreateTriggerTrack.java index 4cef122dc..66313a029 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/CreateTriggerTrack.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/CreateTriggerTrack.java @@ -24,11 +24,11 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import java.util.Collection; import java.io.PrintWriter; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java index 300ff8a01..a29350f28 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java @@ -25,6 +25,8 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; +import org.broad.tribble.util.variantcontext.Allele; +import org.broad.tribble.util.variantcontext.Genotype; import org.broad.tribble.vcf.VCFConstants; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.collections.Pair; @@ -32,8 +34,6 @@ import org.broadinstitute.sting.utils.genotype.DiploidGenotype; import org.broadinstitute.sting.utils.genotype.CalledGenotype; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; import java.util.*; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java index a6f7b8b3a..d52fb2ff5 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java @@ -1,11 +1,13 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broad.tribble.dbsnp.DbSNPFeature; +import org.broad.tribble.util.variantcontext.Allele; +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.pileup.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.*; -import org.broadinstitute.sting.gatk.contexts.variantcontext.*; import java.util.*; @@ -410,7 +412,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc attributes.put("SB", Double.valueOf(strandScore)); } - VariantContext vc = new VariantContext("UG_SNP_call", loc, alleles, genotypes, phredScaledConfidence/10.0, passesCallThreshold(phredScaledConfidence) ? null : filter, attributes); + VariantContext vc = new VariantContext("UG_SNP_call", loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, phredScaledConfidence/10.0, passesCallThreshold(phredScaledConfidence) ? null : filter, attributes); return new VariantCallContext(vc, passesCallThreshold(phredScaledConfidence)); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SimpleIndelCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SimpleIndelCalculationModel.java index 4c61f38c3..b4ad9b7c4 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SimpleIndelCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SimpleIndelCalculationModel.java @@ -1,11 +1,13 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; +import org.broad.tribble.util.variantcontext.Allele; +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.pileup.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.*; -import org.broadinstitute.sting.gatk.contexts.variantcontext.*; import java.util.*; @@ -72,7 +74,7 @@ public class SimpleIndelCalculationModel extends GenotypeCalculationModel { throw new StingException("Internal error (probably a bug): event does not conform to expected format: "+ bestEvent); } - VariantContext vc = new VariantContext("UG_Indel_call", loc, alleles, new HashMap() /* genotypes */, + VariantContext vc = new VariantContext("UG_Indel_call", loc.getContig(), loc.getStart(), loc.getStop(), alleles, new HashMap() /* genotypes */, -1.0 /* log error */, null /* filters */, null /* attributes */); vcc = new VariantCallContext(vc,true); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 0b151c101..4de777755 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -147,7 +147,7 @@ public class UnifiedGenotyper extends LocusWalker { if ( knownIndel == null || !knownIndel.isIndel() ) continue; byte[] indelStr = knownIndel.isInsertion() ? knownIndel.getAlternateAllele(0).getBases() : Utils.dupBytes((byte)'-', knownIndel.getReference().length()); - int start = (int)(knownIndel.getLocation().getStart() - leftmostIndex) + 1; + int start = (int)(knownIndel.getStart() - leftmostIndex) + 1; Consensus c = createAlternateConsensus(start, reference, indelStr, knownIndel.isDeletion()); if ( c != null ) altConsensesToPopulate.add(c); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java index 0e958fc01..fdf14675e 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java @@ -25,9 +25,10 @@ package org.broadinstitute.sting.gatk.walkers.indels; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.filters.Platform454Filter; import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter; import org.broadinstitute.sting.gatk.filters.BadMateFilter; @@ -123,7 +124,7 @@ public class RealignerTargetCreator extends RodWalker { // we don't want to see the same multi-base deletion multiple times if ( positionOfLastVariant != null && positionOfLastVariant.size() > 1 && - positionOfLastVariant.equals(vc.getLocation()) ) + positionOfLastVariant.equals(VariantContextUtils.getLocation(vc)) ) return ""; - positionOfLastVariant = vc.getLocation(); + positionOfLastVariant = VariantContextUtils.getLocation(vc); String contig = context.getLocation().getContig(); long offset = context.getLocation().getStart(); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/SequenomValidationConverter.java b/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/SequenomValidationConverter.java index db0759b4b..16fd5e6f6 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/SequenomValidationConverter.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/SequenomValidationConverter.java @@ -25,11 +25,11 @@ package org.broadinstitute.sting.gatk.walkers.sequenom; +import org.broad.tribble.util.variantcontext.Allele; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.*; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.PlinkRod; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/CompOverlap.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/CompOverlap.java index f8d72fa6d..64d91bc7b 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/CompOverlap.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/CompOverlap.java @@ -1,10 +1,10 @@ package org.broadinstitute.sting.gatk.walkers.varianteval; +import org.broad.tribble.util.variantcontext.Allele; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.*; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; import org.broadinstitute.sting.playground.utils.report.tags.Analysis; import org.broadinstitute.sting.playground.utils.report.tags.DataPoint; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/CountFunctionalClasses.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/CountFunctionalClasses.java index ff319a7f2..63ea4062e 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/CountFunctionalClasses.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/CountFunctionalClasses.java @@ -1,10 +1,10 @@ package org.broadinstitute.sting.gatk.walkers.varianteval; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.playground.utils.report.tags.Analysis; import org.broadinstitute.sting.playground.utils.report.tags.DataPoint; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @Analysis(name = "Count Functional Classes", description = "Counts instances of different functional variant classes (provided the variants are annotated with that information)") diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/CountVariants.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/CountVariants.java index dd078c8ed..b1ed76497 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/CountVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/CountVariants.java @@ -1,10 +1,10 @@ package org.broadinstitute.sting.gatk.walkers.varianteval; +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; import org.broadinstitute.sting.playground.utils.report.tags.Analysis; import org.broadinstitute.sting.playground.utils.report.tags.DataPoint; import org.broadinstitute.sting.utils.StingException; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypeConcordance.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypeConcordance.java index e66bac2a6..9acb4a67b 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypeConcordance.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypeConcordance.java @@ -1,8 +1,10 @@ package org.broadinstitute.sting.gatk.walkers.varianteval; +import org.broad.tribble.util.variantcontext.Allele; +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.*; import org.broadinstitute.sting.gatk.refdata.*; -import org.broadinstitute.sting.gatk.contexts.variantcontext.*; import org.broadinstitute.sting.playground.utils.report.tags.Analysis; import org.broadinstitute.sting.playground.utils.report.tags.DataPoint; import org.broadinstitute.sting.playground.utils.report.utils.TableType; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/IndelLengthHistogram.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/IndelLengthHistogram.java index f264bfaaa..af43409ae 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/IndelLengthHistogram.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/IndelLengthHistogram.java @@ -1,8 +1,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.playground.utils.report.tags.Analysis; import org.broadinstitute.sting.playground.utils.report.tags.DataPoint; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/MendelianViolationEvaluator.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/MendelianViolationEvaluator.java index d90af98db..3b138dd4d 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/MendelianViolationEvaluator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/MendelianViolationEvaluator.java @@ -1,11 +1,11 @@ package org.broadinstitute.sting.gatk.walkers.varianteval; +import org.broad.tribble.util.variantcontext.Allele; +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; import org.broadinstitute.sting.playground.utils.report.tags.Analysis; import org.broadinstitute.sting.playground.utils.report.tags.DataPoint; import org.broadinstitute.sting.utils.StingException; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/SimpleMetricsByAC.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/SimpleMetricsByAC.java index bbc1c06de..610ab0f4b 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/SimpleMetricsByAC.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/SimpleMetricsByAC.java @@ -1,8 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.varianteval; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.playground.utils.report.tags.Analysis; import org.broadinstitute.sting.playground.utils.report.tags.DataPoint; @@ -58,7 +59,7 @@ public class SimpleMetricsByAC extends VariantEvaluator implements StandardEval public MetricsAtAC(int ac) { this.ac = ac; } public void update(VariantContext eval) { - if ( eval.isTransition() ) + if ( VariantContextUtils.isTransition(eval) ) nTi++; else nTv++; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/TiTvVariantEvaluator.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/TiTvVariantEvaluator.java index f1b2e6ee6..4c7d25a7e 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/TiTvVariantEvaluator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/TiTvVariantEvaluator.java @@ -1,9 +1,10 @@ package org.broadinstitute.sting.gatk.walkers.varianteval; +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.playground.utils.report.tags.Analysis; import org.broadinstitute.sting.playground.utils.report.tags.DataPoint; @@ -41,7 +42,7 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv public void updateTiTv(VariantContext vc, boolean updateStandard) { if (vc != null && vc.isSNP() && vc.isBiallelic()) { - if (vc.isTransition()) { + if (VariantContextUtils.isTransition(vc)) { if (updateStandard) nTiInComp++; else nTi++; } else { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 2d3f1cb68..306175ea0 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -26,10 +26,10 @@ package org.broadinstitute.sting.gatk.walkers.varianteval; import org.apache.log4j.Logger; +import org.broad.tribble.util.variantcontext.MutableVariantContext; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableVariantContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvaluator.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvaluator.java index 493bf6f36..293ab67b2 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvaluator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvaluator.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.varianteval; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; /** * The Broad Institute diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantQualityScore.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantQualityScore.java index 4e2436b90..9b595ae3a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantQualityScore.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantQualityScore.java @@ -25,10 +25,11 @@ package org.broadinstitute.sting.gatk.walkers.varianteval; +import org.broad.tribble.util.variantcontext.Allele; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.playground.utils.report.tags.Analysis; import org.broadinstitute.sting.playground.utils.report.tags.DataPoint; @@ -233,7 +234,7 @@ public class VariantQualityScore extends VariantEvaluator { if( eval != null && eval.isSNP() && eval.isBiallelic() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites) if( titvStats == null ) { titvStats = new TiTvStats(); } - titvStats.incrValue(eval.getPhredScaledQual(), eval.isTransition()); + titvStats.incrValue(eval.getPhredScaledQual(), VariantContextUtils.isTransition(eval)); if( alleleCountStats == null ) { alleleCountStats = new AlleleCountStats(); } int alternateAlleleCount = 0; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyVariantCuts.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyVariantCuts.java index 8e6850ad8..f8aab3842 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyVariantCuts.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyVariantCuts.java @@ -25,10 +25,10 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.*; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.RodWalker; @@ -194,7 +194,7 @@ public class ApplyVariantCuts extends RodWalker { if ( !filterString.equals(VCFConstants.PASSES_FILTERS_v4) ) { Set filters = new HashSet(); filters.add(filterString); - vc = new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), vc.getGenotypes(), vc.getNegLog10PError(), filters, vc.getAttributes()); + vc = new VariantContext(vc.getName(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.getGenotypes(), vc.getNegLog10PError(), filters, vc.getAttributes()); } } vcfWriter.add( vc, ref.getBase() ); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GenerateVariantClustersWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GenerateVariantClustersWalker.java index 5e7bbfed9..e57fb4f52 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GenerateVariantClustersWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GenerateVariantClustersWalker.java @@ -26,9 +26,10 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; import org.broad.tribble.dbsnp.DbSNPFeature; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; @@ -41,7 +42,6 @@ import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.commandline.Argument; import java.io.IOException; -import java.io.PrintStream; import java.util.*; /** @@ -164,7 +164,7 @@ public class GenerateVariantClustersWalker extends RodWalker maxAC ) { maxAC = variantDatum.alleleCount; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantGaussianMixtureModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantGaussianMixtureModel.java index 8392718c2..09a4f9c9b 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantGaussianMixtureModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantGaussianMixtureModel.java @@ -26,7 +26,8 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.StingException; @@ -522,7 +523,7 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel value = Double.parseDouble( (String)vc.getAttribute( annotationKey ) ); } catch( Exception e ) { throw new StingException("No double value detected for annotation = " + annotationKey + - " in variant at " + vc.getLocation() + ", reported annotation value = " + vc.getAttribute( annotationKey ) ); + " in variant at " + VariantContextUtils.getLocation(vc) + ", reported annotation value = " + vc.getAttribute( annotationKey ) ); } } return value; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index b95a7cc79..820887615 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -26,10 +26,11 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; import org.broad.tribble.dbsnp.DbSNPFeature; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.*; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; @@ -180,7 +181,7 @@ public class VariantRecalibrator extends RodWalker filters = new HashSet(); filters.add(VCFConstants.PASSES_FILTERS_v4); - VariantContext newVC = new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), vc.getGenotypes(), variantDatum.qual / 10.0, filters, attrs); + VariantContext newVC = new VariantContext(vc.getName(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.getGenotypes(), variantDatum.qual / 10.0, filters, attrs); vcfWriter.add( newVC, ref.getBase() ); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index fb46189c2..8612c284e 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -25,10 +25,10 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.*; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.Reference; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java index 9bef35d37..989f59752 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java @@ -24,6 +24,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils; import org.broadinstitute.sting.utils.SampleUtils; @@ -32,7 +33,6 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFHeader; import java.util.*; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java index d14270faf..26b20f856 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java @@ -24,9 +24,10 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils; +import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -34,7 +35,6 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broad.tribble.vcf.VCFHeader; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/AlleleBalanceHistogramWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/AlleleBalanceHistogramWalker.java index 04a47ed36..017d44e68 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/AlleleBalanceHistogramWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/AlleleBalanceHistogramWalker.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.oneoffprojects.walkers; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.walkers.DataSource; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/AnnotationByAlleleFrequencyWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/AnnotationByAlleleFrequencyWalker.java index f9779b736..f73a63dd6 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/AnnotationByAlleleFrequencyWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/AnnotationByAlleleFrequencyWalker.java @@ -25,10 +25,10 @@ package org.broadinstitute.sting.oneoffprojects.walkers; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotationType; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/BeagleOutputByDepthWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/BeagleOutputByDepthWalker.java index e3a641a78..f911a8652 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/BeagleOutputByDepthWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/BeagleOutputByDepthWalker.java @@ -25,20 +25,15 @@ package org.broadinstitute.sting.oneoffprojects.walkers; +import org.broad.tribble.util.variantcontext.Allele; +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFConstants; -import org.broad.tribble.vcf.VCFRecord; -import org.broad.tribble.vcf.VCFGenotypeRecord; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.gatk.walkers.RMD; -import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.StingException; @@ -138,7 +133,7 @@ public class BeagleOutputByDepthWalker extends RodWalker { Genotype compGenotype = compGenotypes.get(sample); - outputWriter.format("%d %d %d %d %d ", vc_postbgl.getLocation().getStart(), alleleCountH, chrCountH, + outputWriter.format("%d %d %d %d %d ", vc_postbgl.getStart(), alleleCountH, chrCountH, alleleCountEmp, chrCountEmp); diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CreateTiTvTrack.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CreateTiTvTrack.java index a7fc374a1..8d9da0535 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CreateTiTvTrack.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/CreateTiTvTrack.java @@ -1,17 +1,16 @@ package org.broadinstitute.sting.oneoffprojects.walkers; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.wiggle.WiggleHeader; import org.broadinstitute.sting.utils.wiggle.WiggleWriter; import java.util.ArrayList; -import java.util.EnumSet; -import java.util.HashSet; /** * IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl @@ -50,9 +49,9 @@ public class CreateTiTvTrack extends RodWalker { return window; } - window.update(vc.isTransition()); + window.update(VariantContextUtils.isTransition(vc)); if ( window.getTiTv() != null ) { - writer.writeData(vc.getLocation(),window.getTiTv()); + writer.writeData(VariantContextUtils.getLocation(vc),window.getTiTv()); } return window; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java index eaa42b354..80d69fc9d 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java @@ -1,11 +1,11 @@ package org.broadinstitute.sting.oneoffprojects.walkers; import org.broad.tribble.FeatureSource; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.*; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqCodec; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelDBRateWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelDBRateWalker.java index 3acbf3538..a9c803b4d 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelDBRateWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelDBRateWalker.java @@ -1,11 +1,12 @@ package org.broadinstitute.sting.oneoffprojects.walkers; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFHeader; import org.broad.tribble.vcf.VCFHeaderLine; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; import org.broadinstitute.sting.gatk.walkers.Reference; @@ -115,12 +116,12 @@ public class IndelDBRateWalker extends RodWalker public OverlapTable getOverlapTable(ReferenceContext ref) { // step 1: check that the eval queue is non-empty and that we are outside the window - if ( evalContexts.isEmpty() || evalContexts.get(0).getLocation().distance(ref.getLocus()) <= indelWindow ) { + if ( evalContexts.isEmpty() || VariantContextUtils.getLocation(evalContexts.get(0)).distance(ref.getLocus()) <= indelWindow ) { return null; } // step 2: discard all comp variations which come before the window - while ( ! compContexts.isEmpty() && compContexts.get(0).getLocation().isBefore(ref.getLocus()) && - compContexts.get(0).getLocation().distance(ref.getLocus()) > indelWindow) { + while ( ! compContexts.isEmpty() && VariantContextUtils.getLocation(compContexts.get(0)).isBefore(ref.getLocus()) && + VariantContextUtils.getLocation(compContexts.get(0)).distance(ref.getLocus()) > indelWindow) { compContexts.remove(0); } // step 3: see if there are any contexts left; if so then they must be within the window @@ -141,12 +142,12 @@ public class IndelDBRateWalker extends RodWalker public OverlapTable nonEmptyOverlapTable(ReferenceContext ref) { if ( vcfWriter != null ) { int i = 0; - while ( i < compContexts.size() && compContexts.get(i).getLocation().isBefore(evalContexts.get(0).getLocation())) { + while ( i < compContexts.size() && VariantContextUtils.getLocation(compContexts.get(i)).isBefore(VariantContextUtils.getLocation(evalContexts.get(0)))) { vcfWriter.add(compContexts.get(i),compContexts.get(i).getReference().getBases()[0]); i++; } vcfWriter.add(evalContexts.get(0), ref.getBase()); - while ( i < compContexts.size() && compContexts.get(i).getLocation().distance(evalContexts.get(0).getLocation()) <= indelWindow) { + while ( i < compContexts.size() && VariantContextUtils.getLocation(compContexts.get(i)).distance(VariantContextUtils.getLocation(evalContexts.get(0))) <= indelWindow) { vcfWriter.add(compContexts.get(i), compContexts.get(i).getReference().getBases()[0]); i++; } @@ -183,8 +184,8 @@ class OverlapTable { public void setDistances(List comps, VariantContext eval, int winsize) { distances = new ExpandingArrayList(); for ( VariantContext comp : comps ) { - if ( comp.getLocation().distance(eval.getLocation()) <= winsize ) { - distances.add(comp.getLocation().distance(eval.getLocation())); + if ( VariantContextUtils.getLocation(comp).distance(VariantContextUtils.getLocation(eval)) <= winsize ) { + distances.add(VariantContextUtils.getLocation(comp).distance(VariantContextUtils.getLocation(eval))); } } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java index 9a699839f..b13d48783 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MendelianViolationClassifier.java @@ -1,14 +1,15 @@ package org.broadinstitute.sting.oneoffprojects.walkers; +import org.broad.tribble.util.variantcontext.Allele; +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFHeader; import org.broad.tribble.vcf.VCFHeaderLine; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection; @@ -83,7 +84,7 @@ public class MendelianViolationClassifier extends LocusWalker brokenRegions = new ArrayList(3); // can only enter or break regions at unfiltered calls - for( Map.Entry memberGenotype : v.getUnderlyingGenotypes().entrySet() ) { + for( Map.Entry memberGenotype : v.getUnderlyingGenotypes().entrySet() ) { // for each family member if ( homozygousRegions.get(memberGenotype.getKey()) == null ) { // currently in a heterozygous region, update if possible @@ -202,7 +203,7 @@ public class MendelianViolationClassifier extends LocusWalker { public static final String ORIGINAL_CIGAR_TAG = "OC"; @@ -126,4 +127,4 @@ public class RealignedReadCounter extends ReadWalker { System.out.println(updatedIntervals + " intervals were updated"); System.out.println(updatedReads + " reads were updated"); } -} \ No newline at end of file +} diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/TestVariantContextWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/TestVariantContextWalker.java index 17c810f65..3e1e01e3d 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/TestVariantContextWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/TestVariantContextWalker.java @@ -25,9 +25,9 @@ package org.broadinstitute.sting.oneoffprojects.walkers; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.*; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.genotype.vcf.*; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/VCF4ReaderTestWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/VCF4ReaderTestWalker.java deleted file mode 100755 index bc306d8a0..000000000 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/VCF4ReaderTestWalker.java +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Copyright (c) 2010, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.oneoffprojects.walkers; - -import org.broad.tribble.readers.AsciiLineReader; -import org.broad.tribble.vcf.*; -import org.broad.tribble.util.ParsingUtils; -import org.broad.tribble.FeatureCodec; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; -import org.broadinstitute.sting.gatk.refdata.features.vcf4.VCF4Codec; -import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.text.XReadLines; -import org.broadinstitute.sting.utils.genotype.vcf.*; -import org.broadinstitute.sting.commandline.Argument; - -import java.util.*; -import java.io.*; - -import com.sun.xml.internal.ws.wsdl.parser.ParserUtil; - -/** - * IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl - * - * @Author chartl - * @Date Apr 13, 2010 - */ -public class VCF4ReaderTestWalker extends RodWalker { - @Argument(shortName="MR", doc="", required=false) - int maxRecords = -1; - @Argument(shortName="vcf", doc="", required=true) - File vcfFile = null; - @Argument(shortName="Parse", doc="", required=true) - ParsingStatus splitFile = ParsingStatus.NONE; - @Argument(shortName="DontValidate", doc="", required=false) - boolean DontValidate = false; - - @Argument(shortName="USE_VCF3", doc="", required=false) - boolean USE_VCF3 = false; - - - public enum ParsingStatus { NONE, SPLIT_LINES, VARIANTS, GENOTYPES } - - public void initialize() { - } - - public VCFRecord map(RefMetaDataTracker tracker, ReferenceContext context, AlignmentContext alicon) { - return null; - } - - public Long reduce(VCFRecord con, Long num) { - if ( con == null ) { - return num; - } - - return 1 + num; - } - - public Long reduceInit() { - return 0l; - } - - String[] parts = new String[10000]; - public void onTraversalDone(Long num){ - VCF4Codec vcf4codec = new VCF4Codec(); - //VCF4Codec.parseGenotypesToo = splitFile == ParsingStatus.GENOTYPES; - VCF4Codec.validate = ! DontValidate; - - VCFCodec vcf3codec = new VCFCodec(); - - FeatureCodec codec = USE_VCF3 ? vcf3codec : vcf4codec; - - try { - AsciiLineReader lineReader = new AsciiLineReader(new FileInputStream(vcfFile)); - VCFHeader header = (VCFHeader)codec.readHeader(lineReader); - out.printf("Read %d header lines%n", header.getMetaData().size()+1); - - // a counter of the number of lines we've read - int lineNumber = header.getMetaData().size()+1; - while (true) { - String line = lineReader.readLine(); - - if ( line == null ) - break; - - lineNumber++; - if ( lineNumber >= maxRecords && maxRecords != -1 ) { - return; - } - - if ( line.charAt(0) == '#' ) - continue; - - Object vc = null; - if ( splitFile == ParsingStatus.NONE ) { - - } - else if ( splitFile == ParsingStatus.SPLIT_LINES ) { - // todo -- look at header and determine number of elements that need to be parsed. Should be static per file - int nParts = ParsingUtils.split(line, parts, '\t'); - } else { - vc = codec.decode(line); - if ( USE_VCF3 ) { - VCFRecord rec = (VCFRecord)vc; - GenomeLoc loc = GenomeLocParser.createGenomeLoc(rec.getChr(), rec.getStart()); - ReferenceContext ref = new ReferenceContext(loc, (byte)rec.getReference().charAt(0)); - vc = VariantContextAdaptors.toVariantContext("X", vc, ref); - } - } - - if ( lineNumber % 10000 == 0 ) { - System.out.printf("%10d: %s%n", lineNumber, line.subSequence(0, 50)); - System.out.printf("%10d: %s%n", lineNumber, vc); - } - } - } catch ( FileNotFoundException e ) { - throw new StingException(e.getMessage()); - } catch ( IOException e ) { - throw new StingException(e.getMessage()); - } - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/VCF4WriterTestWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/VCF4WriterTestWalker.java index 46c5ff0cb..0a7642654 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/VCF4WriterTestWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/VCF4WriterTestWalker.java @@ -1,19 +1,17 @@ package org.broadinstitute.sting.oneoffprojects.walkers; import org.broad.tribble.readers.AsciiLineReader; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.*; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.features.vcf4.VCF4Codec; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.genotype.vcf.VCFReader; import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils; import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/InsertSizeDistribution.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/InsertSizeDistribution.java index aad365972..011ef56df 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/InsertSizeDistribution.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/InsertSizeDistribution.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.oneoffprojects.walkers.annotator; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFHeaderLineType; import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.pileup.PileupElement; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ProportionOfNonrefBasesSupportingSNP.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ProportionOfNonrefBasesSupportingSNP.java index 12c618ce3..1f80701d9 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ProportionOfNonrefBasesSupportingSNP.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ProportionOfNonrefBasesSupportingSNP.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.oneoffprojects.walkers.annotator; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFHeaderLineType; import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -34,7 +35,6 @@ import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import java.util.Map; import java.util.HashMap; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ProportionOfRefSecondBasesSupportingSNP.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ProportionOfRefSecondBasesSupportingSNP.java index 0544f6f67..30332ce65 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ProportionOfRefSecondBasesSupportingSNP.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ProportionOfRefSecondBasesSupportingSNP.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.oneoffprojects.walkers.annotator; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFHeaderLineType; import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -35,7 +36,6 @@ import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import java.util.Map; import java.util.HashMap; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ProportionOfSNPSecondBasesSupportingRef.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ProportionOfSNPSecondBasesSupportingRef.java index 4a82299bd..5ba088093 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ProportionOfSNPSecondBasesSupportingRef.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/annotator/ProportionOfSNPSecondBasesSupportingRef.java @@ -25,11 +25,11 @@ package org.broadinstitute.sting.oneoffprojects.walkers.annotator; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFHeaderLineType; import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.collections.Pair; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/AminoAcidTransition.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/AminoAcidTransition.java index c0c3217f9..effae28c0 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/AminoAcidTransition.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/AminoAcidTransition.java @@ -1,12 +1,11 @@ package org.broadinstitute.sting.gatk.walkers.varianteval; import org.apache.log4j.Logger; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; -import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvaluator; import org.broadinstitute.sting.playground.utils.report.tags.Analysis; import org.broadinstitute.sting.playground.utils.report.tags.DataPoint; import org.broadinstitute.sting.playground.utils.report.utils.TableType; @@ -15,8 +14,6 @@ import org.broadinstitute.sting.utils.analysis.AminoAcid; import org.broadinstitute.sting.utils.analysis.AminoAcidTable; import org.broadinstitute.sting.utils.analysis.AminoAcidUtils; -import java.util.ArrayList; - /* * Copyright (c) 2010 The Broad Institute * @@ -201,7 +198,7 @@ public class AminoAcidTransition extends VariantEvaluator { } else if ( alternate == null ) { interesting = "Unknown Alternate Codon"; } else { - acidTable.update(reference,alternate,eval.isTransition()); + acidTable.update(reference,alternate, VariantContextUtils.isTransition(eval)); } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/SimpleMetricsBySample.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/SimpleMetricsBySample.java index 8d070568f..c6ee2e44e 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/SimpleMetricsBySample.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/SimpleMetricsBySample.java @@ -1,9 +1,10 @@ package org.broadinstitute.sting.gatk.walkers.varianteval; +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.SampleDataPoint; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvaluatorBySample; @@ -78,7 +79,7 @@ class TiTvRatioSample extends SampleDataPoint { public void update2(VariantContext vc, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if ( vc != null && vc.isSNP() ) { - if ( vc.isTransition() ) { + if ( VariantContextUtils.isTransition(vc) ) { nTi++; } else { nTv++; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/VariantEvaluatorBySample.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/VariantEvaluatorBySample.java index 0edf08dd9..8a7bfb18b 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/VariantEvaluatorBySample.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/VariantEvaluatorBySample.java @@ -1,9 +1,9 @@ package org.broadinstitute.sting.gatk.walkers.varianteval; +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvaluator; import org.broadinstitute.sting.playground.utils.report.tags.DataPoint; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/BeagleOutputToVCFWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/BeagleOutputToVCFWalker.java index 1ecc313f0..394446305 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/BeagleOutputToVCFWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/BeagleOutputToVCFWalker.java @@ -25,6 +25,9 @@ package org.broadinstitute.sting.playground.gatk.walkers; +import org.broad.tribble.util.variantcontext.Allele; +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -64,7 +67,8 @@ public class BeagleOutputToVCFWalker extends RodWalker { @Argument(fullName="output_file", shortName="output", doc="VCF file to which output should be written", required=true) private String OUTPUT_FILE = null; - @Argument(fullName="nocall_threshold", shortName="ncthr", doc="Threshold of confidence at which a genotype won't be called", required=false) + @Argument(fullName="no" + + "call_threshold", shortName="ncthr", doc="Threshold of confidence at which a genotype won't be called", required=false) private double noCallThreshold = 0.0; protected static String line = null; @@ -284,7 +288,7 @@ public class BeagleOutputToVCFWalker extends RodWalker { } - VariantContext filteredVC = new VariantContext("outputvcf", vc_input.getLocation(), vc_input.getAlleles(), genotypes, vc_input.getNegLog10PError(), vc_input.filtersWereApplied() ? vc_input.getFilters() : null, vc_input.getAttributes()); + VariantContext filteredVC = new VariantContext("outputvcf", vc_input.getChr(), vc_input.getStart(), vc_input.getEnd(), vc_input.getAlleles(), genotypes, vc_input.getNegLog10PError(), vc_input.filtersWereApplied() ? vc_input.getFilters() : null, vc_input.getAttributes()); Set altAlleles = filteredVC.getAlternateAlleles(); StringBuffer altAlleleCountString = new StringBuffer(); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/LocusMismatchWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/LocusMismatchWalker.java index 52938691c..8fb87ac35 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/LocusMismatchWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/LocusMismatchWalker.java @@ -25,12 +25,11 @@ package org.broadinstitute.sting.playground.gatk.walkers; +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.walkers.By; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.LocusWalker; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/ProduceBeagleInputWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/ProduceBeagleInputWalker.java index 1c7ba925f..8277469b4 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/ProduceBeagleInputWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/ProduceBeagleInputWalker.java @@ -25,13 +25,14 @@ package org.broadinstitute.sting.playground.gatk.walkers; +import org.broad.tribble.util.variantcontext.Allele; +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFConstants; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.walkers.RodWalker; @@ -93,10 +94,10 @@ public class ProduceBeagleInputWalker extends RodWalker { return 0; // output marker ID to Beagle input file - beagleWriter.print(String.format("%s ", vc_eval.getLocation().toString())); + beagleWriter.print(String.format("%s ", VariantContextUtils.getLocation(vc_eval).toString())); if (beagleGenotypesWriter != null) - beagleGenotypesWriter.print(String.format("%s ", vc_eval.getLocation().toString())); + beagleGenotypesWriter.print(String.format("%s ", VariantContextUtils.getLocation(vc_eval).toString())); for (Allele allele: vc_eval.getAlleles()) { // TODO -- check whether this is really needed by Beagle diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReadBackedPhasingWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReadBackedPhasingWalker.java index a74be9639..e5dd974c4 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReadBackedPhasingWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReadBackedPhasingWalker.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010 The Broad Institute + * Copyright (c) 2010, The Broad Institute * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -12,25 +12,26 @@ * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. - * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. */ package org.broadinstitute.sting.playground.gatk.walkers; import net.sf.samtools.SAMRecord; +import org.broad.tribble.util.variantcontext.Allele; +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.*; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.*; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; @@ -124,7 +125,7 @@ public class ReadBackedPhasingWalker extends LocusWalker, SNPDe public Counter(Long linearOffset) { this.linearOffset = linearOffset; + //System.out.printf("linear offset %d%n", linearOffset); } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/graphalign/GraphReferenceBuilder.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/graphalign/GraphReferenceBuilder.java index 7afe6c837..c732fb897 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/graphalign/GraphReferenceBuilder.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/graphalign/GraphReferenceBuilder.java @@ -28,9 +28,10 @@ package org.broadinstitute.sting.playground.gatk.walkers.graphalign; import net.sf.picard.reference.ReferenceSequence; import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.picard.reference.ReferenceSequenceFileFactory; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.RefWalker; @@ -112,7 +113,7 @@ public class GraphReferenceBuilder extends RefWalker { if ( ! alreadyAddedAtThisLoc ) { // if we have multiple variants at a locus, just take the first damn one we see for now // todo -- getAlternativeBases should be getAlleles() - GenomeLoc loc = vc.getLocation(); + GenomeLoc loc = VariantContextUtils.getLocation(vc); String[] allAllelesList = null; // variant.getAlternateBases().split(""); // todo fixme if ( allAllelesList.length >= 3 ) { // bad dbSNP format :-( List alleles = Arrays.asList(allAllelesList).subList(1,3); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/graphalign/ReferenceGraph.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/graphalign/ReferenceGraph.java index 1cf68b332..158ebc01b 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/graphalign/ReferenceGraph.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/graphalign/ReferenceGraph.java @@ -1,6 +1,6 @@ package org.broadinstitute.sting.playground.gatk.walkers.graphalign; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broad.tribble.util.variantcontext.VariantContext; import org.jgrapht.graph.DefaultEdge; import org.jgrapht.graph.SimpleDirectedGraph; import org.apache.log4j.Logger; @@ -12,7 +12,6 @@ import net.sf.samtools.util.StringUtil; import java.util.*; import java.io.Serializable; import java.io.IOException; -import java.io.ObjectOutputStream; import java.io.ObjectInputStream; /** diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/papergenotyper/GATKPaperGenotyper.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/papergenotyper/GATKPaperGenotyper.java index 119d416b6..87879fd48 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/papergenotyper/GATKPaperGenotyper.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/papergenotyper/GATKPaperGenotyper.java @@ -27,7 +27,6 @@ package org.broadinstitute.sting.playground.gatk.walkers.papergenotyper; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; @@ -36,8 +35,6 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import java.io.PrintStream; - /** * A simple Bayesian genotyper, that outputs a text based call format. Intended to be used only as an diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/secondaryBases/SecondaryBaseTransitionTableWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/secondaryBases/SecondaryBaseTransitionTableWalker.java index ca9cff78e..76d80c47f 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/secondaryBases/SecondaryBaseTransitionTableWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/secondaryBases/SecondaryBaseTransitionTableWalker.java @@ -1,8 +1,8 @@ package org.broadinstitute.sting.playground.gatk.walkers.secondaryBases; +import org.broad.tribble.util.variantcontext.Genotype; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.Reference; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/validation/RodSystemValidationWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/validation/RodSystemValidationWalker.java index faaf204f2..14a3d6c85 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/validation/RodSystemValidationWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/validation/RodSystemValidationWalker.java @@ -1,13 +1,10 @@ package org.broadinstitute.sting.playground.gatk.walkers.validation; -import org.broad.tribble.dbsnp.DbSNPFeature; -import org.broad.tribble.vcf.VCFCodec; -import org.broad.tribble.vcf.VCFRecord; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java index 2d4e86b58..b40237386 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnalyzeAnnotationsWalker.java @@ -25,9 +25,9 @@ package org.broadinstitute.sting.playground.gatk.walkers.variantoptimizer; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.commandline.Argument; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java index aeddcc116..8c8ecf599 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantoptimizer/AnnotationDataManager.java @@ -1,6 +1,6 @@ package org.broadinstitute.sting.playground.gatk.walkers.variantoptimizer; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.StingException; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VariantSelect.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VariantSelect.java index e772e7208..de9d81345 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VariantSelect.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VariantSelect.java @@ -28,10 +28,10 @@ package org.broadinstitute.sting.playground.gatk.walkers.vcftools; import org.apache.commons.jexl2.Expression; import org.apache.commons.jexl2.JexlContext; import org.apache.commons.jexl2.MapContext; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.walkers.RMD; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VariantSubset.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VariantSubset.java index df901557f..496ecf75d 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VariantSubset.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VariantSubset.java @@ -25,10 +25,10 @@ package org.broadinstitute.sting.playground.gatk.walkers.vcftools; +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.commandline.Argument; diff --git a/java/src/org/broadinstitute/sting/utils/genotype/CalledGenotype.java b/java/src/org/broadinstitute/sting/utils/genotype/CalledGenotype.java index b36abc6fb..30143c92a 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/CalledGenotype.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/CalledGenotype.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.utils.genotype; -import org.broadinstitute.sting.gatk.contexts.variantcontext.*; +import org.broad.tribble.util.variantcontext.Allele; +import org.broad.tribble.util.variantcontext.MutableGenotype; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import java.util.*; diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java index 0d0ab7cae..44a20d86b 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java @@ -1,6 +1,6 @@ package org.broadinstitute.sting.utils.genotype; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broad.tribble.util.variantcontext.VariantContext; /* diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java index 779d6bfa5..c2bdf3e69 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java @@ -4,12 +4,13 @@ import edu.mit.broad.picard.genotype.geli.GeliFileWriter; import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMSequenceRecord; +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.genotype.LikelihoodObject; import org.broadinstitute.sting.utils.genotype.CalledGenotype; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.gatk.contexts.variantcontext.*; import java.io.File; @@ -150,8 +151,8 @@ public class GeliAdapter implements GeliGenotypeWriter { } LikelihoodObject obj = new LikelihoodObject(posteriors, LikelihoodObject.LIKELIHOOD_TYPE.LOG); - addCall(GenomeLocParser.getContigInfo(vc.getLocation().getContig()), - (int)vc.getLocation().getStart(), + addCall(GenomeLocParser.getContigInfo(vc.getChr()), + vc.getStart(), ref, maxMappingQual, readCount, diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java index 5338a0ae3..b496bd071 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java @@ -2,8 +2,9 @@ package org.broadinstitute.sting.utils.genotype.geli; import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods; import net.sf.samtools.SAMFileHeader; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broad.tribble.util.variantcontext.Allele; +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.genotype.CalledGenotype; import org.broadinstitute.sting.utils.genotype.DiploidGenotype; @@ -77,7 +78,7 @@ public class GeliTextWriter implements GeliGenotypeWriter { if ( vc.getNSamples() != 1 ) throw new IllegalArgumentException("The Geli format does not support multi-sample or no-calls"); - org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype genotype = vc.getGenotypes().values().iterator().next(); + Genotype genotype = vc.getGenotypes().values().iterator().next(); if ( genotype.isNoCall() ) throw new IllegalArgumentException("The Geli format does not support no-calls"); @@ -128,8 +129,8 @@ public class GeliTextWriter implements GeliGenotypeWriter { sb.append(base); mWriter.println(String.format("%s %16d %c %8d %.0f %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f", - vc.getLocation().getContig(), - vc.getLocation().getStart(), + vc.getChr(), + vc.getStart(), ref, readCount, maxMappingQual, diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java index 17b2b71bb..44d309921 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java @@ -3,9 +3,10 @@ package org.broadinstitute.sting.utils.genotype.glf; import net.sf.samtools.SAMSequenceRecord; import net.sf.samtools.util.BinaryCodec; import net.sf.samtools.util.BlockCompressedOutputStream; +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.MutableGenotype; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFConstants; -import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableGenotype; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.genotype.CalledGenotype; @@ -159,7 +160,7 @@ public class GLFWriter implements GLFGenotypeWriter { if ( vc.getNSamples() != 1 ) throw new IllegalArgumentException("The GLF format does not support multi-sample or no-calls"); - org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype genotype = vc.getGenotypes().values().iterator().next(); + Genotype genotype = vc.getGenotypes().values().iterator().next(); if ( genotype.isNoCall() ) throw new IllegalArgumentException("The GLF format does not support no-calls"); @@ -194,7 +195,7 @@ public class GLFWriter implements GLFGenotypeWriter { if (genotype.hasAttribute(VCFConstants.DEPTH_KEY) && 0 == readCount) readCount = (Integer)((MutableGenotype)genotype).getAttribute(VCFConstants.DEPTH_KEY); - addCall(GenomeLocParser.getContigInfo(vc.getLocation().getContig()), (int)vc.getLocation().getStart(), (float) rms, ref, readCount, obj); + addCall(GenomeLocParser.getContigInfo(vc.getChr()), vc.getStart(), (float) rms, ref, readCount, obj); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java index 3dd6feffb..8bab9b155 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java @@ -25,7 +25,7 @@ package org.broadinstitute.sting.utils.genotype.vcf; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder; import org.broadinstitute.sting.utils.StingException; diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFParameters.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFParameters.java deleted file mode 100644 index f4be429c9..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFParameters.java +++ /dev/null @@ -1,101 +0,0 @@ -package org.broadinstitute.sting.utils.genotype.vcf; - -import org.broad.tribble.vcf.VCFGenotypeEncoding; -import org.broad.tribble.vcf.VCFGenotypeRecord; -import org.broad.tribble.vcf.VCFConstants; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.Utils; - -import java.util.List; -import java.util.ArrayList; - - -/** - * a helper class, which performs a lot of the safety checks on the parameters - * we feed to the VCF (like ensuring the same position for each genotype in a call). - */ -class VCFParameters { - private String referenceBases = "0"; - private int position = 0; - private String contig = null; - private boolean initialized = false; - private List genotypeRecords = new ArrayList(); - private List formatList = new ArrayList(); - private List alternateBases = new ArrayList(); - private List alleleCounts = new ArrayList(); - - public void setLocations(GenomeLoc location, String refBases) { - // if we haven't set it up, we initialize the object - if (!initialized) { - initialized = true; - this.contig = location.getContig(); - this.position = (int) location.getStart(); - if (location.getStart() != location.getStop()) { - throw new IllegalArgumentException("The start and stop locations must be the same"); - } - this.referenceBases = refBases; - } else { - if (!contig.equals(this.contig)) - throw new IllegalArgumentException("The contig name has to be the same at a single locus"); - if (location.getStart() != this.position) - throw new IllegalArgumentException("The position has to be the same at a single locus"); - if (refBases != this.referenceBases) - throw new IllegalArgumentException("The reference has to be the same at a single locus"); - } - } - - /** @return get the position */ - public int getPosition() { - return position; - } - - /** @return get the contig name */ - public String getContig() { - return contig; - } - - /** @return get the reference base */ - public String getReferenceBases() { - return referenceBases; - } - - public void addGenotypeRecord(VCFGenotypeRecord record) { - genotypeRecords.add(record); - for ( VCFGenotypeEncoding allele : record.getAlleles() ) { - int index = alternateBases.indexOf(allele); - if ( index != -1 ) // we don't keep track of ref alleles here - alleleCounts.set(index, alleleCounts.get(index)+1); - } - } - - public void addFormatItem(String item) { - if (!formatList.contains(item)) - formatList.add(item); - } - - public void addAlternateBase(VCFGenotypeEncoding base) { - if ( !alternateBases.contains(base) && - !base.toString().equals(String.valueOf(getReferenceBases()).toUpperCase()) && - !base.toString().equals(VCFConstants.EMPTY_ALLELE) ) { - alternateBases.add(base); - alleleCounts.add(0); - } - } - - public List getAlternateBases() { - return alternateBases; - } - - // the list of allele counts where each entry relates to the corresponding entry in the Alternate Base list - public List getAlleleCounts() { - return alleleCounts; - } - - public String getFormatString() { - return Utils.join(VCFConstants.FORMAT_FIELD_SEPARATOR, formatList); - } - - public List getGenotypeRecords() { - return genotypeRecords; - } -} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java index c125451f9..2873e795a 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java @@ -8,9 +8,9 @@ import java.util.Iterator; import org.broad.tribble.FeatureSource; import org.broad.tribble.index.Index; import org.broad.tribble.source.BasicFeatureSource; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.*; import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.utils.StingException; /** The VCFReader class, which given a valid vcf file, parses out the header and VariantContexts */ diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java index 2aebc9e0c..7ab7503df 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java @@ -25,11 +25,14 @@ package org.broadinstitute.sting.utils.genotype.vcf; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.Utils; import org.apache.log4j.Logger; import java.util.*; @@ -101,8 +104,11 @@ public class VCFUtils { return fields; } + + public static Set smartMergeHeaders(Collection headers, Logger logger) throws IllegalStateException { HashMap map = new HashMap(); // from KEY.NAME -> line + HashSet lines = new HashSet(); // todo -- needs to remove all version headers from sources and add its own VCF version line for ( VCFHeader source : headers ) { @@ -110,7 +116,7 @@ public class VCFUtils { for ( VCFHeaderLine line : source.getMetaData()) { String key = line.getKey(); - if ( line instanceof VCFNamedHeaderLine ) + if ( line instanceof VCFNamedHeaderLine) key = key + "." + ((VCFNamedHeaderLine) line).getName(); if ( map.containsKey(key) ) { @@ -119,9 +125,8 @@ public class VCFUtils { continue; else if ( ! line.getClass().equals(other.getClass()) ) throw new IllegalStateException("Incompatible header types: " + line + " " + other ); - else if ( line instanceof VCFFilterHeaderLine ) { - String lineName = ((VCFFilterHeaderLine) line).getName(); - String otherName = ((VCFFilterHeaderLine) other).getName(); + else if ( line instanceof VCFFilterHeaderLine) { + String lineName = ((VCFFilterHeaderLine) line).getName(); String otherName = ((VCFFilterHeaderLine) other).getName(); if ( ! lineName.equals(otherName) ) throw new IllegalStateException("Incompatible header types: " + line + " " + other ); } else if ( line instanceof VCFCompoundHeaderLine ) { @@ -157,4 +162,18 @@ public class VCFUtils { return new HashSet(map.values()); } + + /** + * return a set of supported format lines; what we currently support for output in the genotype fields of a VCF + * @return a set of VCF format lines + */ + public static Set getSupportedHeaderStrings() { + Set result = new HashSet(); + result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype")); + result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality")); + result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)")); + result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_LIKELIHOODS_KEY, 3, VCFHeaderLineType.Float, "Log-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; not applicable if site is not biallelic")); + return result; + } + } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java index d8a77c6b0..61c540e41 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java @@ -1,10 +1,10 @@ package org.broadinstitute.sting.utils.genotype.vcf; +import org.broad.tribble.util.variantcontext.Allele; +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.*; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.MathUtils; @@ -121,7 +121,7 @@ public class VCFWriter { vc = VariantContextUtils.createVariantContextWithPaddedAlleles(vc, refBase); - GenomeLoc loc = vc.getLocation(); + GenomeLoc loc = VariantContextUtils.getLocation(vc); Map alleleMap = new HashMap(vc.getAlleles().size()); alleleMap.put(Allele.NO_CALL, VCFConstants.EMPTY_ALLELE); // convenience for lookup diff --git a/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/AlleleUnitTest.java b/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/AlleleUnitTest.java index d7bc8a2ee..1a35201e5 100755 --- a/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/AlleleUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/AlleleUnitTest.java @@ -29,6 +29,7 @@ package org.broadinstitute.sting.gatk.contexts.variantcontext; // the imports for unit testing. +import org.broad.tribble.util.variantcontext.Allele; import org.broadinstitute.sting.BaseTest; import org.junit.Assert; import org.junit.Before; diff --git a/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUnitTest.java b/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUnitTest.java index e789eeed8..0a97df622 100755 --- a/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUnitTest.java @@ -4,6 +4,10 @@ package org.broadinstitute.sting.gatk.contexts.variantcontext; // the imports for unit testing. +import org.broad.tribble.util.variantcontext.Allele; +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.MutableVariantContext; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -67,10 +71,10 @@ public class VariantContextUnitTest extends BaseTest { logger.warn("testCreatingSNPVariantContext"); List alleles = Arrays.asList(Aref, T); - VariantContext vc = new VariantContext("test", snpLoc, alleles); + VariantContext vc = new VariantContext("test", snpLoc.getContig(),snpLoc.getStart(), snpLoc.getStop(), alleles); logger.warn("vc = " + vc); - Assert.assertEquals(vc.getLocation(), snpLoc); + Assert.assertEquals(VariantContextUtils.getLocation(vc), snpLoc); Assert.assertEquals(vc.getType(), VariantContext.Type.SNP); Assert.assertTrue(vc.isSNP()); Assert.assertFalse(vc.isIndel()); @@ -80,8 +84,8 @@ public class VariantContextUnitTest extends BaseTest { Assert.assertTrue(vc.isBiallelic()); Assert.assertEquals(vc.getNAlleles(), 2); - Assert.assertTrue(vc.isTransversion()); - Assert.assertFalse(vc.isTransition()); + Assert.assertTrue(VariantContextUtils.isTransversion(vc)); + Assert.assertFalse(VariantContextUtils.isTransition(vc)); Assert.assertEquals(vc.getReference(), Aref); Assert.assertEquals(vc.getAlleles().size(), 2); @@ -98,10 +102,10 @@ public class VariantContextUnitTest extends BaseTest { logger.warn("testCreatingRefVariantContext"); List alleles = Arrays.asList(Aref); - VariantContext vc = new VariantContext("test", snpLoc, alleles); + VariantContext vc = new VariantContext("test", snpLoc.getContig(),snpLoc.getStart(), snpLoc.getStop(), alleles); logger.warn("vc = " + vc); - Assert.assertEquals(snpLoc, vc.getLocation()); + Assert.assertEquals(snpLoc, VariantContextUtils.getLocation(vc)); Assert.assertEquals(VariantContext.Type.NO_VARIATION, vc.getType()); Assert.assertFalse(vc.isSNP()); Assert.assertFalse(vc.isIndel()); @@ -125,10 +129,10 @@ public class VariantContextUnitTest extends BaseTest { logger.warn("testCreatingDeletionVariantContext"); List alleles = Arrays.asList(ATCref, del); - VariantContext vc = new VariantContext("test", delLoc, alleles); + VariantContext vc = new VariantContext("test", delLoc.getContig(), delLoc.getStart(), delLoc.getStop(), alleles); logger.warn("vc = " + vc); - Assert.assertEquals(vc.getLocation(), delLoc); + Assert.assertEquals(VariantContextUtils.getLocation(vc), delLoc); Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL); Assert.assertFalse(vc.isSNP()); Assert.assertTrue(vc.isIndel()); @@ -153,10 +157,10 @@ public class VariantContextUnitTest extends BaseTest { logger.warn("testCreatingInsertionVariantContext"); List alleles = Arrays.asList(delRef, ATC); - VariantContext vc = new VariantContext("test", insLoc, alleles); + VariantContext vc = new VariantContext("test", insLoc.getContig(), insLoc.getStart(), insLoc.getStop(), alleles); logger.warn("vc = " + vc); - Assert.assertEquals(vc.getLocation(), insLoc); + Assert.assertEquals(VariantContextUtils.getLocation(vc), insLoc); Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL); Assert.assertFalse(vc.isSNP()); Assert.assertTrue(vc.isIndel()); @@ -179,45 +183,45 @@ public class VariantContextUnitTest extends BaseTest { @Test (expected = IllegalArgumentException.class) public void testBadConstructorArgs1() { logger.warn("testBadConstructorArgs1"); - new VariantContext("test", insLoc, Arrays.asList(delRef, ATCref)); + new VariantContext("test", insLoc.getContig(), insLoc.getStart(), insLoc.getStop(), Arrays.asList(delRef, ATCref)); } @Test (expected = IllegalArgumentException.class) public void testBadConstructorArgs2() { logger.warn("testBadConstructorArgs2"); - new VariantContext("test", insLoc, Arrays.asList(delRef, del)); + new VariantContext("test", insLoc.getContig(), insLoc.getStart(), insLoc.getStop(), Arrays.asList(delRef, del)); } @Test (expected = IllegalArgumentException.class) public void testBadConstructorArgs3() { logger.warn("testBadConstructorArgs3"); - new VariantContext("test", insLoc, Arrays.asList(del)); + new VariantContext("test", insLoc.getContig(), insLoc.getStart(), insLoc.getStop(), Arrays.asList(del)); } @Test (expected = IllegalArgumentException.class) public void testBadConstructorArgsDuplicateAlleles1() { logger.warn("testBadConstructorArgsDuplicateAlleles1"); - new VariantContext("test", insLoc, Arrays.asList(Aref, T, T)); + new VariantContext("test", insLoc.getContig(), insLoc.getStart(), insLoc.getStop(), Arrays.asList(Aref, T, T)); } @Test (expected = IllegalArgumentException.class) public void testBadConstructorArgsDuplicateAlleles2() { logger.warn("testBadConstructorArgsDuplicateAlleles2"); - new VariantContext("test", insLoc, Arrays.asList(Aref, A)); + new VariantContext("test", insLoc.getContig(), insLoc.getStart(), insLoc.getStop(), Arrays.asList(Aref, A)); } @Test (expected = IllegalStateException.class) public void testBadLoc1() { logger.warn("testBadLoc1"); List alleles = Arrays.asList(Aref, T, del); - VariantContext vc = new VariantContext("test", delLoc, alleles); + VariantContext vc = new VariantContext("test", delLoc.getContig(), delLoc.getStart(), delLoc.getStop(), alleles); } @Test (expected = IllegalStateException.class) public void testBadTiTvRequest() { logger.warn("testBadConstructorArgsDuplicateAlleles2"); - new VariantContext("test", insLoc, Arrays.asList(Aref, ATC)).isTransition(); + VariantContextUtils.isTransition(new VariantContext("test", insLoc.getContig(), insLoc.getStart(), insLoc.getStop(), Arrays.asList(Aref, ATC))); } @Test @@ -230,7 +234,7 @@ public class VariantContextUnitTest extends BaseTest { Genotype g2 = new Genotype("AT", Arrays.asList(Aref, T), 10); Genotype g3 = new Genotype("TT", Arrays.asList(T, T), 10); - VariantContext vc = new VariantContext("test", snpLoc, alleles, Arrays.asList(g1, g2, g3)); + VariantContext vc = new VariantContext("test", snpLoc.getContig(),snpLoc.getStart(), snpLoc.getStop(), alleles, Arrays.asList(g1, g2, g3)); logger.warn("vc = " + vc); Assert.assertTrue(vc.hasGenotypes()); @@ -272,7 +276,7 @@ public class VariantContextUnitTest extends BaseTest { Genotype g5 = new Genotype("dd", Arrays.asList(del, del), 10); Genotype g6 = new Genotype("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), 10); - VariantContext vc = new VariantContext("test", snpLoc, alleles, Arrays.asList(g1, g2, g3, g4, g5, g6)); + VariantContext vc = new VariantContext("test", snpLoc.getContig(),snpLoc.getStart(), snpLoc.getStop(), alleles, Arrays.asList(g1, g2, g3, g4, g5, g6)); logger.warn("vc = " + vc); Assert.assertTrue(vc.hasGenotypes()); @@ -301,7 +305,7 @@ public class VariantContextUnitTest extends BaseTest { Genotype g1 = new Genotype("AA1", Arrays.asList(Aref, Aref), 10); Genotype g2 = new Genotype("AA2", Arrays.asList(Aref, Aref), 10); Genotype g3 = new Genotype("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), 10); - VariantContext vc = new VariantContext("test", snpLoc, alleles, Arrays.asList(g1, g2, g3)); + VariantContext vc = new VariantContext("test", snpLoc.getContig(),snpLoc.getStart(), snpLoc.getStop(), alleles, Arrays.asList(g1, g2, g3)); logger.warn("vc = " + vc); Assert.assertTrue(vc.hasGenotypes()); @@ -323,7 +327,7 @@ public class VariantContextUnitTest extends BaseTest { List alleles = Arrays.asList(Aref, T, del); Genotype g1 = new Genotype("AA", Arrays.asList(Aref, Aref), 10); Genotype g2 = new Genotype("AT", Arrays.asList(Aref, T), 10); - MutableVariantContext vc = new MutableVariantContext("test", snpLoc, alleles, Arrays.asList(g1,g2)); + MutableVariantContext vc = new MutableVariantContext("test", snpLoc.getContig(),snpLoc.getStart(), snpLoc.getStop(), alleles, Arrays.asList(g1,g2)); logger.warn("vc = " + vc); Assert.assertTrue(vc.isNotFiltered()); @@ -359,7 +363,7 @@ public class VariantContextUnitTest extends BaseTest { Genotype g3 = new Genotype("TT", Arrays.asList(T, T), 10); Genotype g4 = new Genotype("..", Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), 10); Genotype g5 = new Genotype("--", Arrays.asList(del, del), 10); - VariantContext vc = new VariantContext("test", snpLoc, alleles, Arrays.asList(g1,g2,g3,g4,g5)); + VariantContext vc = new VariantContext("test", snpLoc.getContig(),snpLoc.getStart(), snpLoc.getStop() , alleles, Arrays.asList(g1,g2,g3,g4,g5)); logger.warn("vc = " + vc); VariantContext vc12 = vc.subContextFromGenotypes(Arrays.asList(g1,g2)); diff --git a/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContextUnitTest.java b/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContextUnitTest.java index a67426769..7372f5eae 100644 --- a/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContextUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContextUnitTest.java @@ -24,6 +24,8 @@ package org.broadinstitute.sting.gatk.contexts.variantcontext; import net.sf.samtools.SAMFileHeader; +import org.broad.tribble.util.variantcontext.Allele; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -139,7 +141,7 @@ public class VariantJEXLContextUnitTest extends BaseTest { private JEXLMap getVarContext() { List alleles = Arrays.asList(Aref, T); - VariantContext vc = new VariantContext("test", snpLoc, alleles); + VariantContext vc = new VariantContext("test", snpLoc.getContig(), snpLoc.getStart(), snpLoc.getStop(), alleles); return new JEXLMap(Arrays.asList(exp),vc); } diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptorsUnitTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptorsUnitTest.java index 59fd7bd7c..565ce5333 100644 --- a/java/test/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptorsUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptorsUnitTest.java @@ -7,8 +7,8 @@ import net.sf.picard.reference.IndexedFastaSequenceFile; import org.broad.tribble.gelitext.GeliTextCodec; import org.broad.tribble.gelitext.GeliTextFeature; import org.broad.tribble.readers.AsciiLineReader; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.genotype.GenotypeWriter; import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/features/vcf4/VCF4UnitTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/features/vcf4/VCF4UnitTest.java deleted file mode 100644 index 090ae961a..000000000 --- a/java/test/org/broadinstitute/sting/gatk/refdata/features/vcf4/VCF4UnitTest.java +++ /dev/null @@ -1,492 +0,0 @@ -package org.broadinstitute.sting.gatk.refdata.features.vcf4; - -import org.broad.tribble.readers.AsciiLineReader; -import org.broad.tribble.vcf.*; -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Test; - -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import net.sf.picard.reference.IndexedFastaSequenceFile; - -/** - * test out pieces of the VCF 4 codec. - */ -public class VCF4UnitTest extends BaseTest { - File vcfGenotypeFile = new File("testdata/vcf/vcfWithGenotypes.vcf"); - File vcfNoGenotypeFile = new File("testdata/vcf/vcfWithoutGenotypes.vcf"); - - // setup the contig ordering - @BeforeClass - public static void setupContig() { - IndexedFastaSequenceFile seq; - seq = new IndexedFastaSequenceFile(new File(b36KGReference)); - GenomeLocParser.setupRefContigOrdering(seq.getSequenceDictionary()); - } - - @Test - public void testReadBasicHeader() { - TestSetup testSetup = new TestSetup().invoke(vcfGenotypeFile); - - int seenRecords = 0; - - // check some field entries of each type - Set lines = testSetup.getHeader().getMetaData(); - - for (VCFHeaderLine line : lines) { - // check the vcf info header lines - if (line instanceof VCFInfoHeaderLine) { - VCFInfoHeaderLine ihLIne = (VCFInfoHeaderLine)line; - - // test a normal info line - if (ihLIne.getName().equals("NS")) { - Assert.assertEquals(VCFHeaderLineType.Integer,ihLIne.getType()); - Assert.assertEquals(1,ihLIne.getCount()); - Assert.assertTrue("Number of Samples With Data".equals(ihLIne.getDescription())); - seenRecords++; - } - // test a info line that uses the period to represent an unbounded value - if (ihLIne.getName().equals("AF")) { - Assert.assertEquals(VCFHeaderLineType.Float,ihLIne.getType()); - Assert.assertEquals(VCFInfoHeaderLine.UNBOUNDED,ihLIne.getCount()); - Assert.assertTrue("Allele Frequency".equals(ihLIne.getDescription())); - seenRecords++; - } - } - // check the vcf filter header lines - if (line instanceof VCFFilterHeaderLine) { - VCFFilterHeaderLine fhLIne = (VCFFilterHeaderLine)line; - if (fhLIne.getName().equals("q10")) { - Assert.assertTrue("Quality below 10".equals(fhLIne.getDescription())); - seenRecords++; - } - } - - // check the vcf info header lines - if (line instanceof VCFFormatHeaderLine) { - VCFFormatHeaderLine ifLIne = (VCFFormatHeaderLine)line; - if (ifLIne.getName().equals("GT")) { - Assert.assertEquals(VCFHeaderLineType.String,ifLIne.getType()); - Assert.assertEquals(1,ifLIne.getCount()); - Assert.assertTrue("Genotype".equals(ifLIne.getDescription())); - seenRecords++; - } - } - } - - Assert.assertEquals("We expected to see three records (one of each type we check), but didn't.",4,seenRecords); - } - - @Test - public void testOutputHeader() { - TestSetup testSetup = new TestSetup().invoke(vcfGenotypeFile); - - File tempFile = null; - try { - tempFile = File.createTempFile("VCF4Test","vcf"); - tempFile.deleteOnExit(); - } catch (IOException e) { - Assert.fail("Couldn't create a temporary file "); - } - // write it to disk - VCFWriter writer = new VCFWriter(tempFile); - writer.writeHeader(testSetup.getHeader()); - writer.close(); - - // md5 sum the file - // TODO -- uncomment this when we have a better solution than using md5s in a unit test - //Assert.assertTrue("expecting md5sum of e376c7cb1831d3cbdca670f360b7f022, but got " + md5SumFile(tempFile),"e376c7cb1831d3cbdca670f360b7f022".equals(md5SumFile(tempFile))); - } - - @Test - public void testCountVCF4Records() { - TestSetup testSetup = new TestSetup().invoke(vcfGenotypeFile); - AsciiLineReader reader = testSetup.getReader(); - - // now parse the lines - String line = null; - try { - line = reader.readLine(); - } catch (IOException e) { - Assert.fail("Failed to read a line"); - } - - // our record count - int recordCount = 0; - while (line != null) { - try { - //System.err.println(codec.decode(line).toString()); - recordCount++; - testSetup.codec.decode(line); - line = reader.readLine(); - } catch (IOException e) { - Assert.fail("Failed to read a line"); - } - } - Assert.assertEquals(7,recordCount); - } - - @Test - public void testCountVCF4RecordsWithoutGenotypes() { - TestSetup testSetup = new TestSetup().invoke(vcfNoGenotypeFile); - AsciiLineReader reader = testSetup.getReader(); - - // now parse the lines - String line = null; - try { - line = reader.readLine(); - } catch (IOException e) { - Assert.fail("Failed to read a line"); - } - - // our record count - int recordCount = 0; - while (line != null) { - try { - recordCount++; - testSetup.codec.decode(line); - line = reader.readLine(); - } catch (IOException e) { - Assert.fail("Failed to read a line"); - } - } - Assert.assertEquals(6,recordCount); - } - - - // test too many info fields - NOT a valid test with validation turned off in the VCF4 reader - String twoManyInfoLine = "20\t14370\trs6054257\tG\tA\t29\tPASS\tNS=3;DP=14;AF=0.5;DB;H2;HH\tGT:GQ:DP:HQ\t0|0:48:1:51,51\t1|0:48:8:51,51\t1/1:43:5:0,0"; - //@Test(expected=StingException.class) - public void testCheckTooManyInfoFields() { - TestSetup testSetup = new TestSetup().invoke(vcfGenotypeFile); - testSetup.codec.decode(twoManyInfoLine); - } - // test a regular line - String regularLine = "20\t14370\trs6054257\tG\tA\t29\tPASS\tNS=3;DP=14;AF=0.5;DB;H2\tGT:GQ:DP:HQ\t0|0:48:1:51,51\t1|0:48:8:51,51\t1/1:43:5:0,0"; - @Test - public void testCheckInfoValidation() { - TestSetup testSetup = new TestSetup().invoke(vcfGenotypeFile); - testSetup.codec.decode(regularLine); - } - // test too few info lines, we don't provide the DP in this line - // test GT field in the incorrect position (!= 0) - String GTFieldInTheWrongPosition = "20\t14370\trs6054257\tG\tA\t29\tPASS\tNS=3;AF=0.5;DB;H2\tGQ:DP:HQ:GT\t48:1:51,51:0|0\t48:8:51,51:0|0\t43:5:0,0:0|0"; - @Test(expected=RuntimeException.class) - public void testCheckGTFieldOrdering() { - TestSetup testSetup = new TestSetup().invoke(vcfGenotypeFile); - testSetup.codec.decode(GTFieldInTheWrongPosition); - } - - // test too few info lines, we don't provide the DP in this line - String twoFewInfoLine = "20\t14370\trs6054257\tG\tA\t29\tPASS\tNS=3;AF=0.5;DB\tGT:GQ:DP:HQ\t0|0:48:1:51,51\t0|0:48:1:51,51\t0|0:48:1:51,51"; - @Test - public void testCheckTwoFewInfoValidation() { - TestSetup testSetup = new TestSetup().invoke(vcfGenotypeFile); - testSetup.codec.decode(twoFewInfoLine); - } - - - - // test that we're getting the right genotype for a multi-base polymorphism - String MNPLine = "20\t14370\trs6054257\tGG\tAT\t29\tPASS\tNS=3;DP=14;AF=0.5;DB;H2\tGT:GQ:DP:HQ\t0|0:48:1:51,51\t1|0:48:8:51,51\t1/1:43:5:.,."; - @Test - public void testMNPValidation() { - TestSetup testSetup = new TestSetup().invoke(vcfGenotypeFile); - VariantContext vc = (VariantContext)testSetup.codec.decode(MNPLine); - Map genotypes = vc.getGenotypes(); - Assert.assertTrue(genotypes.containsKey("NA00003")); - Genotype g = genotypes.get("NA00003"); - Assert.assertTrue("Expected AT genotype, saw = " + g.getAllele(0),"AT".equals(g.getAllele(0).toString())); - Assert.assertTrue(vc.getType()== VariantContext.Type.MNP); - } - - // test that we're getting the right genotype for what appears to be a multi-base polymorphism, but is really just a SNP - String MNPLine2 = "20\t14370\trs6054257\tGT\tAT\t29\tPASS\tNS=3;DP=14;AF=0.5;DB;H2\tGT:GQ:DP:HQ\t0|0:48:1:51,51\t1|0:48:8:51,51\t1/1:43:5:.,."; - @Test - public void testMNPWannabeButReallyASNPValidation() { - TestSetup testSetup = new TestSetup().invoke(vcfGenotypeFile); - VariantContext vc = (VariantContext)testSetup.codec.decode(MNPLine2); - Map genotypes = vc.getGenotypes(); - Assert.assertTrue(genotypes.containsKey("NA00003")); - Genotype g = genotypes.get("NA00003"); - Assert.assertTrue("Expected A genotype, saw = " + g.getAllele(0),"A".equals(g.getAllele(0).toString())); - Assert.assertTrue(vc.getType()== VariantContext.Type.SNP); - } - - File largeVCF = new File("yri.vcf"); // change to whatever file you'd like to test in the following test - - // @Test uncomment to re-enable testing - public void checkLargeVCF() { - TestSetup testSetup = new TestSetup().invoke(largeVCF); - AsciiLineReader reader = testSetup.getReader(); - - // now parse the lines - String line = null; - try { - line = reader.readLine(); - } catch (IOException e) { - Assert.fail("Failed to read a line"); - } - - // our record count - int recordCount = 0; - int badRecordCount = 0; - long milliseconds = System.currentTimeMillis(); - while (line != null) { - try { - recordCount++; - try { - testSetup.codec.decode(line); - } catch (Exception e) { - //System.err.println(e.getMessage() + " -> " + line); - //System.err.println(line); - Assert.fail("Bad record from line " + line + " message = " + e.getMessage()); - badRecordCount++; - } - line = reader.readLine(); - if (recordCount % 1000 == 0) - System.err.println("record count == " + recordCount); - } catch (IOException e) { - Assert.fail("Failed to read a line"); - } - } - System.err.println("Total time = " + (System.currentTimeMillis() - milliseconds)); - Assert.assertEquals(0,badRecordCount); - Assert.assertEquals(728075,recordCount); - } - - //@Test - public void checkBobsCNVVCF() { - TestSetup testSetup = new TestSetup().invoke(new File("bobs.vcf")); - AsciiLineReader reader = testSetup.getReader(); - VCF4Codec codec = testSetup.getCodec(); - - // now parse the lines - String line = null; - try { - line = reader.readLine(); - } catch (IOException e) { - Assert.fail("Failed to read a line"); - } - - // our record count - int recordCount = 0; - int badRecordCount = 0; - long milliseconds = System.currentTimeMillis(); - while (line != null) { - try { - recordCount++; - try { - testSetup.codec.decode(line); - } catch (Exception e) { - Assert.fail("Bad record from line " + line + " message = " + e.getMessage()); - badRecordCount++; - } - line = reader.readLine(); - if (recordCount % 1000 == 0) - System.err.println("record count == " + recordCount); - } catch (IOException e) { - Assert.fail("Failed to read a line"); - } - } - System.err.println("Total time = " + (System.currentTimeMillis() - milliseconds)); - Assert.assertEquals(0,badRecordCount); - Assert.assertEquals(15947,recordCount); - } - - /** - * test out the clipping of alleles (removing extra context provided by VCF implementation). - */ - @Test - public void testClippingOfAllelesDeletionAndInsertion() { - String ref = "GGTT"; - ArrayList alleles = new ArrayList(); - alleles.add(Allele.create(ref,true)); - alleles.add(Allele.create("GGAATT",false)); - alleles.add(Allele.create("GT",false)); - - Pair> locAndList = VCF4Codec.clipAlleles("1",1,ref,alleles); - Assert.assertTrue(locAndList.first.equals(GenomeLocParser.createGenomeLoc("1",1,3))); - - // we know the ordering - //System.err.println(locAndList.second.get(0).toString()); - //System.err.println(locAndList.second.get(1).toString()); - //System.err.println(locAndList.second.get(2).toString()); - Assert.assertTrue(locAndList.second.get(0).toString().equals("GT*")); - Assert.assertTrue(locAndList.second.get(0).isReference()); - Assert.assertTrue(locAndList.second.get(1).toString().equals("GAAT")); - Assert.assertTrue(locAndList.second.get(2).toString().equals("-")); - } - - /** - * test out the clipping of alleles (removing extra context provided by VCF implementation). - */ - @Test - public void testClippingManyPotentialFrontClippedBases() { - String ref = "GGGGTT"; - ArrayList alleles = new ArrayList(); - alleles.add(Allele.create(ref,true)); - alleles.add(Allele.create("GGGGAATT",false)); - alleles.add(Allele.create("GGGT",false)); - - Pair> locAndList = VCF4Codec.clipAlleles("1",1,ref,alleles); - Assert.assertTrue(locAndList.first.equals(GenomeLocParser.createGenomeLoc("1",1,5))); - - // we know the ordering - //System.err.println(locAndList.second.get(0).toString()); - //System.err.println(locAndList.second.get(1).toString()); - //System.err.println(locAndList.second.get(2).toString()); - Assert.assertTrue(locAndList.second.get(0).toString().equals("GGGT*")); - Assert.assertTrue(locAndList.second.get(0).isReference()); - Assert.assertTrue(locAndList.second.get(1).toString().equals("GGGAAT")); - Assert.assertTrue(locAndList.second.get(2).toString().equals("GG")); - } - - /** - * test out the clipping of alleles (removing extra context provided by VCF implementation). - */ - @Test - public void testClippingOfAllelesLongRefRepeat() { - String ref = "GGGG"; - ArrayList alleles = new ArrayList(); - alleles.add(Allele.create(ref,true)); - alleles.add(Allele.create("G",false)); - alleles.add(Allele.create("GG",false)); - - Pair> locAndList = VCF4Codec.clipAlleles("1",1,ref,alleles); - Assert.assertTrue(locAndList.first.equals(GenomeLocParser.createGenomeLoc("1",1,4))); - - // we know the ordering - Assert.assertTrue(locAndList.second.get(0).toString().equals("GGG*")); - Assert.assertTrue(locAndList.second.get(0).isReference()); - Assert.assertTrue(locAndList.second.get(1).toString().equals("-")); - Assert.assertTrue(locAndList.second.get(2).toString().equals("G")); - } - - /** - * test out the clipping of alleles (removing extra context provided by VCF implementation). - * TODO - this is kind of a tricky test... we don't know which way clipped the reads, but the position should be accurate - */ - @Test - public void testClippingOfAllelesLongRefRepeatClippable() { - String ref = "GGGGG"; - ArrayList alleles = new ArrayList(); - alleles.add(Allele.create(ref,true)); - alleles.add(Allele.create("GG",false)); - alleles.add(Allele.create("GGG",false)); - - Pair> locAndList = VCF4Codec.clipAlleles("1",1,ref,alleles); - Assert.assertTrue(locAndList.first.equals(GenomeLocParser.createGenomeLoc("1",1,4))); - - // we know the ordering - Assert.assertTrue(locAndList.second.get(0).toString().equals("GGG*")); - Assert.assertTrue(locAndList.second.get(0).isReference()); - Assert.assertTrue(locAndList.second.get(1).toString().equals("-")); - Assert.assertTrue(locAndList.second.get(2).toString().equals("G")); - } - - /** - * test out the clipping of alleles (removing extra context provided by VCF implementation). - */ - @Test - public void testClippingOfAllelesPlainPolyMorph() { - String ref = "C"; - ArrayList alleles = new ArrayList(); - alleles.add(Allele.create(ref,true)); - alleles.add(Allele.create("T",false)); - alleles.add(Allele.create("G",false)); - - Pair> locAndList = VCF4Codec.clipAlleles("1",1,ref,alleles); - Assert.assertTrue(locAndList.first.equals(GenomeLocParser.createGenomeLoc("1",1,1))); - - // we know the ordering - Assert.assertTrue(locAndList.second.get(0).toString().equals("C*")); - Assert.assertTrue(locAndList.second.get(0).isReference()); - Assert.assertTrue(locAndList.second.get(1).toString().equals("T")); - Assert.assertTrue(locAndList.second.get(2).toString().equals("G")); - } - - /** - * test out the clipping of alleles (removing extra context provided by VCF implementation). - */ - @Test - public void testClippingOfAllelesInsertions() { - String ref = "C"; - ArrayList alleles = new ArrayList(); - alleles.add(Allele.create(ref,true)); - alleles.add(Allele.create("CTTTTT",false)); - alleles.add(Allele.create("GGGGGG",false)); - - Pair> locAndList = VCF4Codec.clipAlleles("1",1,ref,alleles); - Assert.assertTrue(locAndList.first.equals(GenomeLocParser.createGenomeLoc("1",1,1))); - - // we know the ordering - Assert.assertTrue(locAndList.second.get(0).toString().equals("C*")); - Assert.assertTrue(locAndList.second.get(0).isReference()); - Assert.assertTrue(locAndList.second.get(1).toString().equals("CTTTTT")); - Assert.assertTrue(locAndList.second.get(2).toString().equals("GGGGGG")); - } - - @Test - public void testGenotypeConversionPhasing() { - String[] parts = {"GT:GD:DP", "0|0", "0|1", "1\\1"}; - List alleles = new ArrayList(); - alleles.add(Allele.create("A", true)); - alleles.add(Allele.create("G", false)); - Pair> locAndAlleles = new Pair>(GenomeLocParser.createGenomeLoc("1",1),alleles); - TestSetup testSetup = new TestSetup().invoke(vcfGenotypeFile); - Map genotypes = testSetup.getCodec().createGenotypeMap(parts, locAndAlleles,0); - // assert the first genotype is phased, and the third is not - Assert.assertTrue(genotypes.get("NA00001").genotypesArePhased()); - Assert.assertTrue(!genotypes.get("NA00003").genotypesArePhased()); - } - - /** - * a test setup for the VCF 4 codec - */ - private class TestSetup { - private AsciiLineReader reader; - private VCF4Codec codec; - private VCFHeader header; - - public AsciiLineReader getReader() { - return reader; - } - - public VCF4Codec getCodec() { - return codec; - } - - public VCFHeader getHeader() { - return header; - } - - public TestSetup invoke(File vcfFile) { - reader = null; - try { - reader = new AsciiLineReader(new FileInputStream(vcfFile)); - } catch (FileNotFoundException e) { - Assert.fail("Unable to parse out VCF file " + vcfFile); - } - codec = new VCF4Codec(); - header = (VCFHeader)codec.readHeader(reader); - return this; - } - } -} diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/IndexPerformanceTests.java b/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/IndexPerformanceTests.java index 25fd04b60..05e05ec0a 100644 --- a/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/IndexPerformanceTests.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/IndexPerformanceTests.java @@ -9,20 +9,18 @@ import org.broad.tribble.index.Index; import org.broad.tribble.index.linear.LinearIndex; import org.broad.tribble.iterators.CloseableTribbleIterator; import org.broad.tribble.source.BasicFeatureSource; -import org.broad.tribble.vcf.VCF3Codec; +import org.broad.tribble.util.LittleEndianOutputStream; import org.broad.tribble.vcf.VCFCodec; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableCodec; +import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableFeature; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.collections.Pair; import org.junit.Assert; import org.junit.Before; import org.junit.Test; -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.io.PrintWriter; +import java.io.*; import java.util.*; /** @@ -41,6 +39,12 @@ public class IndexPerformanceTests extends BaseTest { // the input types Map inputTypes = new HashMap(); + // where the vcf files are located + String fileLocation = validationDataLocation + "Index_Performance_Data/"; + + // bin sizes to try + int[] binSizes = {10, 100, 1000, 5000, 10000, 50000}; + PrintWriter writer; PrintWriter writer2; /** setup the files we're going to run with, including their names */ @@ -52,27 +56,18 @@ public class IndexPerformanceTests extends BaseTest { GenomeLocParser.setupRefContigOrdering(seq); // the input files - inputFiles.put("\"10\"",new File("tip10.vcf")); - inputFiles.put("\"100\"",new File("tip100.vcf")); - inputFiles.put("\"1,000\"",new File("tip1000.vcf")); - inputFiles.put("\"10,000\"",new File("tip10000.vcf")); - inputFiles.put("\"100,000\"",new File("tip100000.vcf")); - inputFiles.put("\"1,000,000\"",new File("tip1000000.vcf")); + inputFiles.put("\"10\"",new File(fileLocation + "tip10.vcf")); + inputFiles.put("\"100\"",new File(fileLocation + "tip100.vcf")); + inputFiles.put("\"1,000\"",new File(fileLocation + "tip1000.vcf")); + inputFiles.put("\"10,000\"",new File(fileLocation + "tip10000.vcf")); + inputFiles.put("\"100,000\"",new File(fileLocation + "tip100000.vcf")); + inputFiles.put("\"1,000,000\"",new File(fileLocation + "tip1000000.vcf")); for (String name : inputFiles.keySet()) { inputTypes.put(name,VCFCodec.class); } inputFiles.put("Big Table",new File("/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/slowAnnotator/big.table.txt")); inputTypes.put("Big Table", AnnotatorInputTableCodec.class); - /*inputFiles.put("100", new File("1000.vcf")); - inputFiles.put("Medium (100K) VCF",new File("100K.vcf")); - inputFiles.put("Big Table",new File("/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/slowAnnotator/big.table.txt")); - inputFiles.put("Huge (1M) VCF",new File("1M.vcf")); - // the input types - inputTypes.put("Huge (1M) VCF", VCFCodec.class); - inputTypes.put("Medium (100K) VCF", VCFCodec.class); - inputTypes.put("1000 records VCF", VCFCodec.class); - inputTypes.put("Big Table", AnnotatorInputTableCodec.class);*/ } @Test @@ -88,22 +83,23 @@ public class IndexPerformanceTests extends BaseTest { } catch (IOException e) { Assert.fail("Unable to open file testOutput.txt"); } - writer.println("name,index,createTime,seekTime,thousandPerThousand,record_count,index_size"); - writer2.println("name,index,createTime,seekTime,thousandPerThousand,record_count,index_size"); + writer.println("name,index,binSize,createTime,seekTime,thousandPerThousand,record_count,index_size"); + writer2.println("name,index,binSize,createTime,seekTime,thousandPerThousand,record_count,index_size"); for (String name : inputFiles.keySet()) { - System.err.println("running " + name + " with linear index"); - printTestLine(name,true); - System.err.println("running " + name + " with tree index"); - printTestLine(name,false); + for (int size : binSizes) { + System.err.println("running " + name + " with bin size " + size); + printTestLine(name, true, size); + printTestLine(name, false, size); + } } writer.close(); writer2.close(); } - private void printTestLine(String name, boolean useLinear) { + private void printTestLine(String name, boolean useLinear, int size) { PrintWriter wr = (useLinear) ? writer : writer2; - List values = performIndexTest(name,useLinear); - wr.print(name + "," + ((useLinear) ? "linear" : "tree")); + List values = performIndexTest(name,useLinear, size); + wr.print(name + "," + ((useLinear) ? "linear" : "tree") + "," + size); for (Long l : values) { wr.print(","); wr.print(l); @@ -115,16 +111,18 @@ public class IndexPerformanceTests extends BaseTest { * time various tasks using the specified index * @param name the name to get * @return a five-piece: the time to create the index, the time to seek to chromosome 1, and the time to process reading - * every other 1000 bases of chr1 (of the first 100M), the count of records seen in the last oepration, and the index size + * every other 1000 bases of chr1 (of the first 100M), the count of records seen in the last operation, and the index size */ - public List performIndexTest(String name, boolean useLinear) { + public List performIndexTest(String name, boolean useLinear, int size) { TribbleRMDTrackBuilder.useLinearIndex = useLinear; + TribbleRMDTrackBuilder.binSize = size; + deleteIndex(inputFiles.get(name)); // time creating the index long createTime = System.currentTimeMillis(); Pair pairing = builder.createFeatureReader(inputTypes.get(name),inputFiles.get(name)); createTime = System.currentTimeMillis() - createTime; - System.err.println("index creation took " + createTime); + //System.err.println("index creation took " + createTime); // seek to chr1 long seekTo1 = seekToChr1(pairing); @@ -145,7 +143,7 @@ public class IndexPerformanceTests extends BaseTest { Assert.fail("Unable to load file for query!!"); } thousandEveryThousand = System.currentTimeMillis() - thousandEveryThousand; - System.err.println("thousand every thousand (for first million) took " + thousandEveryThousand); + //System.err.println("thousand every thousand (for first million) took " + thousandEveryThousand); return Arrays.asList(createTime,seekTo1,thousandEveryThousand,count,new File(inputFiles.get(name) + ".idx").length()); } @@ -158,10 +156,43 @@ public class IndexPerformanceTests extends BaseTest { Assert.fail("Unable to load file for query!!"); } seekTo1 = System.currentTimeMillis() - seekTo1; - System.err.println("seeking to chr1 took " + seekTo1); + //System.err.println("seeking to chr1 took " + seekTo1); return seekTo1; } + //@Test + public void testBigTable() { + File bigTable = new File("/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/slowAnnotator/big.table.txt"); + TribbleRMDTrackBuilder.useLinearIndex = false; + TribbleRMDTrackBuilder.binSize = 1000; + + deleteIndex(inputFiles.get("Big Table")); + // time creating the index + logger.warn("creating index"); + long createTime = System.currentTimeMillis(); + Pair pairing = builder.createFeatureReader(inputTypes.get("Big Table"),inputFiles.get("Big Table")); + createTime = System.currentTimeMillis() - createTime; + //System.err.println("index creation took " + createTime); + PrintWriter stream = null; + logger.warn("reading and writing"); + try { + stream = new PrintWriter(new File("bigTable.out.tree")); + } catch (FileNotFoundException e) { + Assert.fail("Fail!!!"); + } + try { + for (int x = 1; x < 200000; x = x + 1000) { + CloseableTribbleIterator iter = pairing.first.query("chr1", x, x+1000); // query + for (Feature feat : iter) { + stream.println(((AnnotatorInputTableFeature)feat).toString()); + } + } + } catch (IOException e) { + Assert.fail("Unable to load file for query!!"); + } + stream.close(); + } + private void deleteIndex(File fl) { File indexFile = new File(fl + TribbleRMDTrackBuilder.indexExtension); diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index 96570a6a6..b6614f44e 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -77,9 +77,9 @@ public class CombineVariantsIntegrationTest extends WalkerTest { @Test public void threeWayWithRefs() { WalkerTestSpec spec = new WalkerTestSpec( - baseTestString(" -B NA19240_BGI,VCF4,"+validationDataLocation+"NA19240.BGI.RG.vcf" + - " -B NA19240_ILLUMINA,VCF4,"+validationDataLocation+"NA19240.ILLUMINA.RG.vcf" + - " -B NA19240_WUGSC,VCF4,"+validationDataLocation+"NA19240.WUGSC.RG.vcf" + + baseTestString(" -B NA19240_BGI,VCF,"+validationDataLocation+"NA19240.BGI.RG.vcf" + + " -B NA19240_ILLUMINA,VCF,"+validationDataLocation+"NA19240.ILLUMINA.RG.vcf" + + " -B NA19240_WUGSC,VCF,"+validationDataLocation+"NA19240.WUGSC.RG.vcf" + " -B denovoInfo,VCF,"+validationDataLocation+"yri_merged_validation_data_240610.annotated.b36.vcf" + " -setKey centerSet" + " -variantMergeOptions UNION" + diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsUnitTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsUnitTest.java index 9c2a25f93..332f531f5 100644 --- a/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsUnitTest.java @@ -1,8 +1,8 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; +import org.broad.tribble.vcf.VCFCodec; import org.broad.tribble.vcf.VCFHeader; import org.broad.tribble.vcf.VCFHeaderLine; -import org.broadinstitute.sting.gatk.refdata.features.vcf4.VCF4Codec; import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderUnitTest; import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils; import org.junit.Assert; @@ -60,7 +60,7 @@ public class CombineVariantsUnitTest { }; private VCFHeader createHeader(String[] headerStr) { - VCF4Codec codec = new VCF4Codec(); + VCFCodec codec = new VCFCodec(); List headerFields = new ArrayList(); for (String str : headerStr) headerFields.add(str); diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderUnitTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderUnitTest.java index 77539a9ec..cd883f346 100644 --- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderUnitTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderUnitTest.java @@ -2,7 +2,6 @@ package org.broadinstitute.sting.utils.genotype.vcf; import org.broad.tribble.vcf.*; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.refdata.features.vcf4.VCF4Codec; import org.junit.Assert; import org.junit.Test; @@ -22,7 +21,7 @@ import java.util.List; public class VCFHeaderUnitTest extends BaseTest { private VCFHeader createHeader(String[] headerStr) { - VCF4Codec codec = new VCF4Codec(); + VCFCodec codec = new VCFCodec(); List headerFields = new ArrayList(); for (String str : headerStr) headerFields.add(str); diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java index 5451c5cce..be6c6a9f3 100644 --- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java @@ -1,11 +1,11 @@ package org.broadinstitute.sting.utils.genotype.vcf; import org.broad.tribble.readers.AsciiLineReader; +import org.broad.tribble.util.variantcontext.Allele; +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.*; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; -import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.StingException; @@ -125,7 +125,7 @@ public class VCFWriterUnitTest extends BaseTest { genotypes.put(name,gt); } - return new VariantContext("RANDOM",loc, alleles, genotypes, 0, filters, attributes); + return new VariantContext("RANDOM",loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, 0, filters, attributes); }