From a536e1da848fa8407557f0f850c78475255e9e8f Mon Sep 17 00:00:00 2001 From: David Roazen Date: Tue, 29 Jan 2013 13:57:39 -0500 Subject: [PATCH] Move some VCF/VariantContext methods back to the GATK based on feedback -Moved some of the more specialized / complex VariantContext and VCF utility methods back to the GATK. -Due to this re-shuffling, was able to return things like the Pair class back to the GATK as well. --- .../gatk/walkers/annotator/RankSumTest.java | 2 +- .../annotator/TandemRepeatAnnotator.java | 6 +- .../gatk/walkers/bqsr/BaseRecalibrator.java | 2 +- .../reducereads/MultiSampleCompressor.java | 2 +- .../reducereads/SingleSampleCompressor.java | 2 +- .../reducereads/SlidingWindow.java | 2 +- .../genotyper/ConsensusAlleleCounter.java | 5 +- .../GeneralPloidyGenotypeLikelihoods.java | 2 +- ...dyGenotypeLikelihoodsCalculationModel.java | 2 +- .../genotyper/UnifiedArgumentCollection.java | 4 +- .../walkers/genotyper/UnifiedGenotyper.java | 4 +- .../genotyper/UnifiedGenotyperEngine.java | 7 +- .../genotyper/afcalc/DiploidExactAFCalc.java | 5 +- .../walkers/genotyper/afcalc/ExactAFCalc.java | 4 +- .../afcalc/GeneralPloidyExactAFCalc.java | 5 +- .../afcalc/OriginalDiploidExactAFCalc.java | 2 +- .../haplotypecaller/GenotypingEngine.java | 5 +- .../haplotypecaller/HaplotypeCaller.java | 5 +- .../gatk/walkers/indels/IndelRealigner.java | 2 +- .../walkers/phasing/PhaseByTransmission.java | 3 +- .../validation/GenotypeAndValidate.java | 4 +- .../ValidationSiteSelector.java | 4 +- .../variantutils/RegenotypeVariants.java | 3 +- .../utils/recalibration/RecalDatumNode.java | 2 +- .../sting/utils/recalibration/RecalUtils.java | 2 +- .../recalibration/RecalibrationReport.java | 2 +- .../covariates/RepeatCovariate.java | 10 +- .../RepeatUnitAndLengthCovariate.java | 12 - .../covariates/RepeatUnitCovariate.java | 14 - ...eralPloidyGenotypeLikelihoodsUnitTest.java | 2 +- .../afcalc/AFCalcPerformanceUnitTest.java | 2 +- .../StratificationManagerUnitTest.java | 2 +- .../ConcordanceMetricsUnitTest.java | 2 +- .../RepeatCovariatesUnitTest.java | 44 +- .../sting/commandline/ParsingEngine.java | 2 +- .../providers/RODMetaDataContainer.java | 2 +- .../sting/gatk/executive/Accumulator.java | 2 +- .../gatk/refdata/VariantContextAdaptors.java | 3 +- .../gatk/refdata/tracks/RMDTrackBuilder.java | 2 +- .../sting/gatk/walkers/Walker.java | 2 +- .../annotator/VariantAnnotatorEngine.java | 3 +- .../walkers/coverage/DepthOfCoverage.java | 2 +- .../walkers/coverage/GCContentByInterval.java | 2 +- .../fasta/FastaAlternateReferenceMaker.java | 2 +- .../walkers/fasta/FastaReferenceMaker.java | 2 +- .../sting/gatk/walkers/qc/CountIntervals.java | 2 +- .../sting/gatk/walkers/qc/CountRODs.java | 2 +- .../sting/gatk/walkers/qc/CountRODsByRef.java | 2 +- .../gatk/walkers/qc/CountTerminusEvent.java | 2 +- .../gatk/walkers/readutils/ClipReads.java | 2 +- .../gatk/walkers/varianteval/VariantEval.java | 5 +- .../varianteval/VariantEvalReportWriter.java | 2 +- .../stratifications/TandemRepeat.java | 4 +- .../manager/StratificationManager.java | 2 +- .../walkers/variantutils/CombineVariants.java | 23 +- .../variantutils/GenotypeConcordance.java | 2 +- .../walkers/variantutils/SelectHeaders.java | 4 +- .../walkers/variantutils/SelectVariants.java | 5 +- .../VariantValidationAssessor.java | 4 +- .../walkers/variantutils/VariantsToTable.java | 3 +- .../walkers/variantutils/VariantsToVCF.java | 2 +- .../sting/tools/CatVariants.java | 2 +- .../sting/utils/MannWhitneyU.java | 2 +- .../sting/utils/SWPairwiseAlignment.java | 2 +- .../sting/utils/SampleUtils.java | 10 +- .../broadinstitute/sting/utils/baq/BAQ.java | 2 +- .../utils/collections}/Pair.java | 2 +- .../sting/utils/duplicates/DupUtils.java | 2 +- .../sting/utils/fragments/FragmentUtils.java | 2 +- .../help/GenericDocumentationHandler.java | 2 +- .../sting/utils/interval/IntervalUtils.java | 2 +- .../sting/utils/sam/ReadUtils.java | 2 +- .../sting/utils/variant/GATKVCFUtils.java | 47 +- .../variant/GATKVariantContextUtils.java | 948 ++++++++++++++++++ .../variant/utils/BaseUtils.java | 6 +- .../variant/utils/GeneralUtils.java | 7 - .../variant/variantcontext/CommonInfo.java | 2 +- .../variantcontext/VariantContextUtils.java | 940 +---------------- .../broadinstitute/variant/vcf/VCFUtils.java | 50 - .../org/broadinstitute/sting/WalkerTest.java | 2 +- .../sting/utils/MWUnitTest.java | 2 +- .../BandPassActivityProfileUnitTest.java | 6 +- .../GATKVariantContextUtilsUnitTest.java} | 332 +++--- .../variant/VariantContextBenchmark.java | 6 +- .../variant/VariantBaseTest.java | 25 + .../VariantContextTestProvider.java | 51 +- .../VariantContextUnitTest.java | 74 -- 87 files changed, 1408 insertions(+), 1386 deletions(-) rename public/java/src/org/broadinstitute/{variant/utils => sting/utils/collections}/Pair.java (98%) rename public/java/test/org/broadinstitute/{variant/variantcontext/VariantContextUtilsUnitTest.java => sting/utils/variant/GATKVariantContextUtilsUnitTest.java} (79%) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java index 6f965227c..ec107512a 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java @@ -57,7 +57,7 @@ import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.MannWhitneyU; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.variant.vcf.VCFHeaderLine; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.variant.variantcontext.Allele; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java index a6b713551..2e0e759c2 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java @@ -53,8 +53,8 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.variant.utils.Pair; -import org.broadinstitute.variant.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.variant.vcf.VCFHeaderLineCount; import org.broadinstitute.variant.vcf.VCFHeaderLineType; import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; @@ -79,7 +79,7 @@ public class TandemRepeatAnnotator extends InfoFieldAnnotation implements Standa if ( !vc.isIndel()) return null; - Pair,byte[]> result = VariantContextUtils.getNumTandemRepeatUnits(vc, ref.getForwardBases()); + Pair,byte[]> result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, ref.getForwardBases()); if (result == null) return null; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java index 002bdc39f..2df5fefa8 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java @@ -63,7 +63,7 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.clipping.ReadClipper; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/MultiSampleCompressor.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/MultiSampleCompressor.java index 50d741f13..6818669df 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/MultiSampleCompressor.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/MultiSampleCompressor.java @@ -49,7 +49,7 @@ package org.broadinstitute.sting.gatk.walkers.compression.reducereads; import net.sf.samtools.SAMFileHeader; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.SampleUtils; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SingleSampleCompressor.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SingleSampleCompressor.java index 5b08e99a0..036d2782a 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SingleSampleCompressor.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SingleSampleCompressor.java @@ -46,7 +46,7 @@ package org.broadinstitute.sting.gatk.walkers.compression.reducereads; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java index 57a11f640..7ce606f20 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java @@ -53,7 +53,7 @@ import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMFileHeader; import org.broadinstitute.sting.gatk.downsampling.ReservoirDownsampler; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.recalibration.EventType; import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java index 90904ab29..ddf47805f 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java @@ -53,7 +53,8 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.clipping.ReadClipper; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -288,7 +289,7 @@ public class ConsensusAlleleCounter { if (vcs.isEmpty()) return Collections.emptyList(); // nothing else to do, no alleles passed minimum count criterion - final VariantContext mergedVC = VariantContextUtils.simpleMerge(vcs, null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.UNSORTED, false, false, null, false, false); + final VariantContext mergedVC = GATKVariantContextUtils.simpleMerge(vcs, null, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, GATKVariantContextUtils.GenotypeMergeType.UNSORTED, false, false, null, false, false); return mergedVC.getAlleles(); } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoods.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoods.java index 6a5fbce39..cf144a735 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoods.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoods.java @@ -51,7 +51,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactACcounts; import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactACset; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.variant.vcf.VCFConstants; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java index 23804bb23..3e0437edb 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java @@ -56,7 +56,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.variant.vcf.VCFConstants; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.variant.variantcontext.*; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index e97c92309..a7f90ebec 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -49,8 +49,8 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection; import org.broadinstitute.sting.utils.pairhmm.PairHMM; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.variantcontext.VariantContext; -import org.broadinstitute.variant.variantcontext.VariantContextUtils; public class UnifiedArgumentCollection extends StandardCallerArgumentCollection { @@ -172,7 +172,7 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection Sample ploidy - equivalent to number of chromosomes per pool. In pooled experiments this should be = # of samples in pool * individual sample ploidy */ @Argument(shortName="ploidy", fullName="sample_ploidy", doc="Plody (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy).", required=false) - public int samplePloidy = VariantContextUtils.DEFAULT_PLOIDY; + public int samplePloidy = GATKVariantContextUtils.DEFAULT_PLOIDY; @Hidden @Argument(shortName="minqs", fullName="min_quality_score", doc="Min quality score to consider. Smaller numbers process faster. Default: Q1.", required=false) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 46ac10d90..d16ece4fd 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -61,7 +61,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.baq.BAQ; -import org.broadinstitute.variant.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; @@ -304,7 +304,7 @@ public class UnifiedGenotyper extends LocusWalker, Unif headerInfo.add(new VCFInfoHeaderLine(UnifiedGenotyperEngine.NUMBER_OF_DISCOVERED_ALLELES_KEY, 1, VCFHeaderLineType.Integer, "Number of alternate alleles discovered (but not necessarily genotyped) at this site")); // add the pool values for each genotype - if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) { + if (UAC.samplePloidy != GATKVariantContextUtils.DEFAULT_PLOIDY) { headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the alternate allele count, in the same order as listed, for each individual sample")); headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the alternate allele fraction, in the same order as listed, for each individual sample")); } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index b1aaf8190..8f6097661 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -61,6 +61,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcResult; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.classloader.PluginManager; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.variant.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -134,7 +135,7 @@ public class UnifiedGenotyperEngine { // --------------------------------------------------------------------------------------------------------- @Requires({"toolkit != null", "UAC != null"}) public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) { - this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), VariantContextUtils.DEFAULT_PLOIDY); + this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), GATKVariantContextUtils.DEFAULT_PLOIDY); } @Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0","ploidy>0"}) @@ -525,7 +526,7 @@ public class UnifiedGenotyperEngine { // if we are subsetting alleles (either because there were too many or because some were not polymorphic) // then we may need to trim the alleles (because the original VariantContext may have had to pad at the end). if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext ) // limitedContext callers need to handle allele trimming on their own to keep their perReadAlleleLikelihoodMap alleles in sync - vcCall = VariantContextUtils.reverseTrimAlleles(vcCall); + vcCall = GATKVariantContextUtils.reverseTrimAlleles(vcCall); if ( annotationEngine != null && !limitedContext ) { // limitedContext callers need to handle annotations on their own by calling their own annotationEngine // Note: we want to use the *unfiltered* and *unBAQed* context for the annotations @@ -662,7 +663,7 @@ public class UnifiedGenotyperEngine { private void determineGLModelsToUse() { String modelPrefix = ""; - if ( !UAC.GLmodel.name().contains(GPSTRING) && UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY ) + if ( !UAC.GLmodel.name().contains(GPSTRING) && UAC.samplePloidy != GATKVariantContextUtils.DEFAULT_PLOIDY ) modelPrefix = GPSTRING; if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") ) { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/DiploidExactAFCalc.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/DiploidExactAFCalc.java index 68f57a300..170b6e250 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/DiploidExactAFCalc.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/DiploidExactAFCalc.java @@ -47,6 +47,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.variantcontext.*; import java.util.*; @@ -105,7 +106,7 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc { alleles.add(vc.getReference()); alleles.addAll(chooseMostLikelyAlternateAlleles(vc, getMaxAltAlleles())); builder.alleles(alleles); - builder.genotypes(VariantContextUtils.subsetDiploidAlleles(vc, alleles, false)); + builder.genotypes(GATKVariantContextUtils.subsetDiploidAlleles(vc, alleles, false)); return builder.make(); } else { return vc; @@ -351,6 +352,6 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc { final List allelesToUse, final boolean assignGenotypes, final int ploidy) { - return VariantContextUtils.subsetDiploidAlleles(vc, allelesToUse, assignGenotypes); + return GATKVariantContextUtils.subsetDiploidAlleles(vc, allelesToUse, assignGenotypes); } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalc.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalc.java index cf6b67afd..3d28db159 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalc.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactAFCalc.java @@ -47,10 +47,10 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.variantcontext.Allele; import org.broadinstitute.variant.variantcontext.Genotype; import org.broadinstitute.variant.variantcontext.GenotypesContext; -import org.broadinstitute.variant.variantcontext.VariantContextUtils; import java.util.ArrayList; @@ -92,7 +92,7 @@ abstract class ExactAFCalc extends AFCalc { if ( sample.hasLikelihoods() ) { double[] gls = sample.getLikelihoods().getAsVector(); - if ( MathUtils.sum(gls) < VariantContextUtils.SUM_GL_THRESH_NOCALL ) + if ( MathUtils.sum(gls) < GATKVariantContextUtils.SUM_GL_THRESH_NOCALL ) genotypeLikelihoods.add(gls); } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/GeneralPloidyExactAFCalc.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/GeneralPloidyExactAFCalc.java index 1e1652c68..f8c364e82 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/GeneralPloidyExactAFCalc.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/GeneralPloidyExactAFCalc.java @@ -48,6 +48,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; import org.broadinstitute.sting.gatk.walkers.genotyper.GeneralPloidyGenotypeLikelihoods; import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.variant.variantcontext.*; @@ -553,7 +554,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc { } // if there is no mass on the (new) likelihoods, then just no-call the sample - if ( MathUtils.sum(newLikelihoods) > VariantContextUtils.SUM_GL_THRESH_NOCALL ) { + if ( MathUtils.sum(newLikelihoods) > GATKVariantContextUtils.SUM_GL_THRESH_NOCALL ) { newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES)); } else { @@ -565,7 +566,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc { gb.PL(newLikelihoods); // if we weren't asked to assign a genotype, then just no-call the sample - if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > VariantContextUtils.SUM_GL_THRESH_NOCALL ) + if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > GATKVariantContextUtils.SUM_GL_THRESH_NOCALL ) gb.alleles(NO_CALL_ALLELES); else assignGenotype(gb, newLikelihoods, allelesToUse, ploidy); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/OriginalDiploidExactAFCalc.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/OriginalDiploidExactAFCalc.java index 92305fe4b..325d3b722 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/OriginalDiploidExactAFCalc.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/OriginalDiploidExactAFCalc.java @@ -47,7 +47,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.variant.variantcontext.Allele; import org.broadinstitute.variant.variantcontext.VariantContext; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index fa15eccdf..9aeffe966 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -57,6 +57,7 @@ import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.variant.variantcontext.*; @@ -173,7 +174,7 @@ public class GenotypingEngine { validatePriorityList( priorityList, eventsAtThisLoc ); // Merge the event to find a common reference representation - final VariantContext mergedVC = VariantContextUtils.simpleMerge(eventsAtThisLoc, priorityList, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false); + final VariantContext mergedVC = GATKVariantContextUtils.simpleMerge(eventsAtThisLoc, priorityList, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false); if( mergedVC == null ) { continue; } if( eventsAtThisLoc.size() != mergedVC.getAlternateAlleles().size() ) { @@ -203,7 +204,7 @@ public class GenotypingEngine { VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, call); if( annotatedCall.getAlleles().size() != mergedVC.getAlleles().size() ) { // some alleles were removed so reverseTrimming might be necessary! - annotatedCall = VariantContextUtils.reverseTrimAlleles(annotatedCall); + annotatedCall = GATKVariantContextUtils.reverseTrimAlleles(annotatedCall); } returnCalls.add( annotatedCall ); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 455809a17..8b3eb9f1b 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -72,6 +72,7 @@ import org.broadinstitute.sting.utils.activeregion.ActiveRegion; import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState; import org.broadinstitute.sting.utils.activeregion.ActivityProfileState; import org.broadinstitute.sting.utils.clipping.ReadClipper; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; @@ -297,7 +298,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem samplesList.addAll( samples ); // initialize the UnifiedGenotyper Engine which is used to call into the exact model final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection( SCAC ); // this adapter is used so that the full set of unused UG arguments aren't exposed to the HC user - UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY); + UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY); // create a UAC but with the exactCallsLog = null, so we only output the log for the HC caller itself, if requested UnifiedArgumentCollection simpleUAC = new UnifiedArgumentCollection(UAC); @@ -307,7 +308,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem simpleUAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.min( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING ); // low values used for isActive determination only, default/user-specified values used for actual calling simpleUAC.CONTAMINATION_FRACTION = 0.0; simpleUAC.exactCallsLog = null; - UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), simpleUAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY); + UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), simpleUAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY); // initialize the output VCF header annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit()); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index 1865cadea..851703648 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -63,7 +63,7 @@ import org.broadinstitute.sting.gatk.walkers.BAQMode; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.baq.BAQ; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.UserException; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java index 4510dfe55..80c49ff19 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java @@ -59,6 +59,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.vcf.*; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; @@ -444,7 +445,7 @@ public class PhaseByTransmission extends RodWalker, HashMa ArrayList rodNames = new ArrayList(); rodNames.add(variantCollection.variants.getName()); Map vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames); - Set vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); + Set vcfSamples = SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); //Get the trios from the families passed as ped setTrios(); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidate.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidate.java index 927e1e607..f0efb3cd9 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidate.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/validation/GenotypeAndValidate.java @@ -58,12 +58,12 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.vcf.VCFHeader; import org.broadinstitute.variant.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.variant.variantcontext.VariantContext; import org.broadinstitute.variant.variantcontext.VariantContextBuilder; -import org.broadinstitute.variant.variantcontext.VariantContextUtils; import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import org.broadinstitute.variant.vcf.VCFUtils; @@ -327,7 +327,7 @@ public class GenotypeAndValidate extends RodWalker header = GATKVCFUtils.getVCFHeadersFromRodPrefix(getToolkit(), alleles.getName()); - samples = SampleUtils.getSampleList(header, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); + samples = SampleUtils.getSampleList(header, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); Set headerLines = VCFUtils.smartMergeHeaders(header.values(), true); headerLines.add(new VCFHeaderLine("source", "GenotypeAndValidate")); vcfWriter.writeHeader(new VCFHeader(headerLines, samples)); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/ValidationSiteSelector.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/ValidationSiteSelector.java index dcd7cd67b..ce44f546d 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/ValidationSiteSelector.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/validation/validationsiteselector/ValidationSiteSelector.java @@ -54,12 +54,12 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.vcf.VCFHeader; import org.broadinstitute.variant.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.variant.GATKVCFUtils; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.variant.variantcontext.VariantContext; -import org.broadinstitute.variant.variantcontext.VariantContextUtils; import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import java.io.File; @@ -227,7 +227,7 @@ public class ValidationSiteSelector extends RodWalker { public void initialize() { // Get list of samples to include in the output Map vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit()); - TreeSet vcfSamples = new TreeSet(SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE)); + TreeSet vcfSamples = new TreeSet(SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE)); Collection samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFiles); Collection samplesFromExpressions = SampleUtils.matchSamplesExpressions(vcfSamples, sampleExpressions); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RegenotypeVariants.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RegenotypeVariants.java index 74ab8f073..c8fc27e6a 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RegenotypeVariants.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RegenotypeVariants.java @@ -61,6 +61,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.variantcontext.*; import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import org.broadinstitute.variant.vcf.*; @@ -115,7 +116,7 @@ public class RegenotypeVariants extends RodWalker implements T String trackName = variantCollection.variants.getName(); Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName)); - UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY); + UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY); final Set hInfo = new HashSet(); hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(trackName))); diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java index 9122c9ab6..637d9fb2d 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java +++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatumNode.java @@ -51,7 +51,7 @@ import com.google.java.contract.Requires; import org.apache.commons.math.MathException; import org.apache.commons.math.stat.inference.ChiSquareTestImpl; import org.apache.log4j.Logger; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.Collection; diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java index e44f2e06e..699f26c5e 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java +++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalUtils.java @@ -58,7 +58,7 @@ import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.collections.NestedIntegerArray; import org.broadinstitute.sting.utils.collections.NestedHashMap; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java index e3ab16639..2f9a38972 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java +++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/RecalibrationReport.java @@ -51,7 +51,7 @@ import org.broadinstitute.sting.gatk.report.GATKReportTable; import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.collections.NestedIntegerArray; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RepeatCovariate.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RepeatCovariate.java index a836fbb5e..546bd6ac8 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RepeatCovariate.java +++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RepeatCovariate.java @@ -51,9 +51,9 @@ import com.google.java.contract.Requires; import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; import org.broadinstitute.sting.utils.recalibration.ReadCovariates; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.utils.BaseUtils; -import org.broadinstitute.variant.utils.Pair; -import org.broadinstitute.variant.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.utils.collections.Pair; import java.util.Arrays; import java.util.HashMap; @@ -112,7 +112,7 @@ public abstract class RepeatCovariate implements ExperimentalCovariate { // get backward repeat unit and # repeats byte[] backwardRepeatUnit = Arrays.copyOfRange(readBases, offset - str + 1, offset + 1); - maxBW = VariantContextUtils.findNumberofRepetitions(backwardRepeatUnit, Arrays.copyOfRange(readBases, 0, offset + 1), false); + maxBW = GATKVariantContextUtils.findNumberofRepetitions(backwardRepeatUnit, Arrays.copyOfRange(readBases, 0, offset + 1), false); if (maxBW > 1) { bestBWRepeatUnit = backwardRepeatUnit.clone(); break; @@ -132,7 +132,7 @@ public abstract class RepeatCovariate implements ExperimentalCovariate { // get forward repeat unit and # repeats byte[] forwardRepeatUnit = Arrays.copyOfRange(readBases, offset +1, offset+str+1); - maxFW = VariantContextUtils.findNumberofRepetitions(forwardRepeatUnit,Arrays.copyOfRange(readBases, offset+1, readBases.length), true); + maxFW = GATKVariantContextUtils.findNumberofRepetitions(forwardRepeatUnit, Arrays.copyOfRange(readBases, offset + 1, readBases.length), true); if (maxFW > 1) { bestFWRepeatUnit = forwardRepeatUnit.clone(); break; @@ -150,7 +150,7 @@ public abstract class RepeatCovariate implements ExperimentalCovariate { // but correct representation at that place might be (C)4. // Hence, if the FW and BW units don't match, check if BW unit can still be a part of FW unit and add // representations to total - maxBW = VariantContextUtils.findNumberofRepetitions(bestFWRepeatUnit, Arrays.copyOfRange(readBases, 0, offset + 1), false); + maxBW = GATKVariantContextUtils.findNumberofRepetitions(bestFWRepeatUnit, Arrays.copyOfRange(readBases, 0, offset + 1), false); maxRL = maxFW + maxBW; bestRepeatUnit = bestFWRepeatUnit; diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RepeatUnitAndLengthCovariate.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RepeatUnitAndLengthCovariate.java index 5822b9e05..c4fdaad8b 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RepeatUnitAndLengthCovariate.java +++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RepeatUnitAndLengthCovariate.java @@ -48,18 +48,6 @@ package org.broadinstitute.sting.utils.recalibration.covariates; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; -import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; -import org.broadinstitute.sting.utils.recalibration.ReadCovariates; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; -import org.broadinstitute.variant.utils.BaseUtils; -import org.broadinstitute.variant.utils.Pair; -import org.broadinstitute.variant.variantcontext.VariantContextUtils; - -import java.util.Arrays; -import java.util.HashMap; -import java.util.Map; -import java.util.Set; public class RepeatUnitAndLengthCovariate extends RepeatCovariate { diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RepeatUnitCovariate.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RepeatUnitCovariate.java index ed843310d..b32feb9a3 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RepeatUnitCovariate.java +++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RepeatUnitCovariate.java @@ -46,20 +46,6 @@ package org.broadinstitute.sting.utils.recalibration.covariates; -import com.google.java.contract.Ensures; -import com.google.java.contract.Requires; -import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; -import org.broadinstitute.sting.utils.recalibration.ReadCovariates; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.variant.utils.BaseUtils; -import org.broadinstitute.variant.utils.Pair; -import org.broadinstitute.variant.variantcontext.VariantContextUtils; - -import java.util.Arrays; -import java.util.HashMap; -import java.util.Map; -import java.util.Set; - /** * Created with IntelliJ IDEA. * User: rpoplin diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsUnitTest.java index 08d333f8b..14dedebc4 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsUnitTest.java @@ -52,7 +52,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.variant.variantcontext.*; import org.testng.Assert; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceUnitTest.java index 291489984..8deddc357 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceUnitTest.java @@ -49,7 +49,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.variant.variantcontext.Allele; import org.broadinstitute.variant.variantcontext.VariantContext; import org.testng.Assert; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/manager/StratificationManagerUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/manager/StratificationManagerUnitTest.java index f144f9b59..aabcd374d 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/manager/StratificationManagerUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/manager/StratificationManagerUnitTest.java @@ -52,7 +52,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.manage import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.DataProvider; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetricsUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetricsUnitTest.java index 300dd633d..9c0567464 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetricsUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetricsUnitTest.java @@ -49,7 +49,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.variant.variantcontext.Allele; diff --git a/protected/java/test/org/broadinstitute/sting/utils/recalibration/RepeatCovariatesUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/recalibration/RepeatCovariatesUnitTest.java index e4311a534..7ded176bb 100644 --- a/protected/java/test/org/broadinstitute/sting/utils/recalibration/RepeatCovariatesUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/utils/recalibration/RepeatCovariatesUnitTest.java @@ -46,21 +46,17 @@ package org.broadinstitute.sting.utils.recalibration; import com.google.java.contract.Requires; -import net.sf.samtools.SAMFileHeader; -import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; import org.broadinstitute.sting.utils.recalibration.covariates.*; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; -import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.utils.BaseUtils; -import org.broadinstitute.variant.utils.Pair; -import org.broadinstitute.variant.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.utils.collections.Pair; import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; -import java.io.File; import java.util.ArrayList; import java.util.Arrays; import java.util.Random; @@ -89,38 +85,38 @@ public class RepeatCovariatesUnitTest { @Test(enabled = true) public void testFindNumberOfRepetitions() { // First, test logic to compute number of repetitions of a substring on a given string. - int result = VariantContextUtils.findNumberofRepetitions("AC".getBytes(), "ACAC".getBytes(), true); + int result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "ACAC".getBytes(), true); Assert.assertEquals(2,result); - result = VariantContextUtils.findNumberofRepetitions("AC".getBytes(),"ACACACAC".getBytes(), true); + result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "ACACACAC".getBytes(), true); Assert.assertEquals(4,result); - result = VariantContextUtils.findNumberofRepetitions("AC".getBytes(),"ACACACACGT".getBytes(), true); + result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "ACACACACGT".getBytes(), true); Assert.assertEquals(4,result); - result = VariantContextUtils.findNumberofRepetitions("AC".getBytes(),"GTACACACAC".getBytes(), true); + result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "GTACACACAC".getBytes(), true); Assert.assertEquals(0,result); - result = VariantContextUtils.findNumberofRepetitions("GCA".getBytes(),"GTAGGGT".getBytes(), true); + result = GATKVariantContextUtils.findNumberofRepetitions("GCA".getBytes(), "GTAGGGT".getBytes(), true); Assert.assertEquals(0,result); - result = VariantContextUtils.findNumberofRepetitions("GCAGCA".getBytes(),"GCAGCAGTAGGGTGTACACACAC".getBytes(), true); + result = GATKVariantContextUtils.findNumberofRepetitions("GCAGCA".getBytes(), "GCAGCAGTAGGGTGTACACACAC".getBytes(), true); Assert.assertEquals(1,result); - result = VariantContextUtils.findNumberofRepetitions("GCAGCA".getBytes(),"GTAGGGTGTACACACACGCAGCAT".getBytes(), true); + result = GATKVariantContextUtils.findNumberofRepetitions("GCAGCA".getBytes(), "GTAGGGTGTACACACACGCAGCAT".getBytes(), true); Assert.assertEquals(0,result); - result = VariantContextUtils.findNumberofRepetitions("GCA".getBytes(),"GTAGGGTGTACACACACGCAGCAGCA".getBytes(), true); + result = GATKVariantContextUtils.findNumberofRepetitions("GCA".getBytes(), "GTAGGGTGTACACACACGCAGCAGCA".getBytes(), true); Assert.assertEquals(0,result); // Same tests but looking backward on string - result = VariantContextUtils.findNumberofRepetitions("AC".getBytes(),"ACAC".getBytes(), false); + result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "ACAC".getBytes(), false); Assert.assertEquals(2,result); - result = VariantContextUtils.findNumberofRepetitions("AC".getBytes(),"ACACACAC".getBytes(), false); + result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "ACACACAC".getBytes(), false); Assert.assertEquals(4,result); - result = VariantContextUtils.findNumberofRepetitions("AC".getBytes(),"ACACACACGT".getBytes(), false); + result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "ACACACACGT".getBytes(), false); Assert.assertEquals(0,result); - result = VariantContextUtils.findNumberofRepetitions("AC".getBytes(),"GTACACACAC".getBytes(), false); + result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "GTACACACAC".getBytes(), false); Assert.assertEquals(4,result); - result = VariantContextUtils.findNumberofRepetitions("GCA".getBytes(),"GTAGGGT".getBytes(), false); + result = GATKVariantContextUtils.findNumberofRepetitions("GCA".getBytes(), "GTAGGGT".getBytes(), false); Assert.assertEquals(0,result); - result = VariantContextUtils.findNumberofRepetitions("GCAGCA".getBytes(),"GCAGCAGTAGGGTGTACACACAC".getBytes(), false); + result = GATKVariantContextUtils.findNumberofRepetitions("GCAGCA".getBytes(), "GCAGCAGTAGGGTGTACACACAC".getBytes(), false); Assert.assertEquals(0,result); - result = VariantContextUtils.findNumberofRepetitions("GCAGCA".getBytes(),"GTAGGGTGTACACACACGCAGCAT".getBytes(), false); + result = GATKVariantContextUtils.findNumberofRepetitions("GCAGCA".getBytes(), "GTAGGGTGTACACACACGCAGCAT".getBytes(), false); Assert.assertEquals(0,result); - result = VariantContextUtils.findNumberofRepetitions("GCA".getBytes(),"GTAGGGTGTACACACACGCAGCAGCA".getBytes(), false); + result = GATKVariantContextUtils.findNumberofRepetitions("GCA".getBytes(), "GTAGGGTGTACACACACGCAGCAGCA".getBytes(), false); Assert.assertEquals(3,result); // test logic to get repeat unit and number of repeats from covariate value @@ -208,8 +204,8 @@ public class RepeatCovariatesUnitTest { Assert.assertEquals(rurlValM,rurlValI); - int fw = VariantContextUtils.findNumberofRepetitions(ruValM.getBytes(), readBases.substring(offset+1,readLength).getBytes(),true); - int bw = VariantContextUtils.findNumberofRepetitions(ruValM.getBytes(), readBases.substring(0,offset+1).getBytes(),false); + int fw = GATKVariantContextUtils.findNumberofRepetitions(ruValM.getBytes(), readBases.substring(offset+1,readLength).getBytes(),true); + int bw = GATKVariantContextUtils.findNumberofRepetitions(ruValM.getBytes(), readBases.substring(0,offset+1).getBytes(),false); Assert.assertEquals(Math.min(fw+bw,RepeatCovariate.MAX_REPEAT_LENGTH),(int)Integer.valueOf(rlValM)); } diff --git a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java index 071bd2cad..5e863f4f7 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java +++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java @@ -31,7 +31,7 @@ import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.classloader.JVMUtils; import org.broadinstitute.sting.utils.classloader.PluginManager; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.ApplicationDetails; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RODMetaDataContainer.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RODMetaDataContainer.java index b7c824360..e078e678b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RODMetaDataContainer.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/RODMetaDataContainer.java @@ -26,7 +26,7 @@ package org.broadinstitute.sting.gatk.datasources.providers; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/Accumulator.java b/public/java/src/org/broadinstitute/sting/gatk/executive/Accumulator.java index ea83aab53..d0ba0fa21 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/executive/Accumulator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/executive/Accumulator.java @@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocSortedSet; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.ArrayList; diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index a77341a5d..09f053187 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.codecs.hapmap.RawHapMapFeature; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.variantcontext.*; import java.util.*; @@ -200,7 +201,7 @@ public class VariantContextAdaptors { if ( isSNP(dbsnp) || isMNP(dbsnp) ) addPaddingBase = false; else if ( isIndel(dbsnp) || dbsnp.getVariantType().contains("mixed") ) - addPaddingBase = refBaseIsDash || VariantContextUtils.requiresPaddingBase(stripNullDashes(getAlleleList(dbsnp))); + addPaddingBase = refBaseIsDash || GATKVariantContextUtils.requiresPaddingBase(stripNullDashes(getAlleleList(dbsnp))); else return null; // can't handle anything else diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java index ec51b2f53..c5f87d625 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java @@ -40,7 +40,7 @@ import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.file.FSLockWithShared; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java index 979cc2fbf..522414c00 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java @@ -35,7 +35,7 @@ import org.broadinstitute.sting.gatk.samples.Sample; import org.broadinstitute.sting.gatk.samples.SampleDB; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.baq.BAQ; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.recalibration.BQSRMode; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index 29f6ed388..c5a6fd624 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; +import org.broadinstitute.sting.utils.variant.GATKVCFUtils; import org.broadinstitute.variant.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.variant.variantcontext.*; @@ -249,7 +250,7 @@ public class VariantAnnotatorEngine { private VariantContext annotateDBs(final RefMetaDataTracker tracker, final GenomeLoc loc, VariantContext vc, final Map infoAnnotations) { for ( Map.Entry, String> dbSet : dbAnnotations.entrySet() ) { if ( dbSet.getValue().equals(VCFConstants.DBSNP_KEY) ) { - final String rsID = VCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbSet.getKey(), loc), vc.getType()); + final String rsID = GATKVCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbSet.getKey(), loc), vc.getType()); // add the ID if appropriate if ( rsID != null ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java index 383b22295..dbb8ed5a6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java @@ -46,7 +46,7 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.refseq.RefSeqCodec; import org.broadinstitute.sting.utils.codecs.refseq.RefSeqFeature; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByInterval.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByInterval.java index 77a4af1cd..668d3fd5f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByInterval.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/GCContentByInterval.java @@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.io.PrintStream; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceMaker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceMaker.java index ac6d82375..8a5b3530e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceMaker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceMaker.java @@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.variant.variantcontext.VariantContext; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceMaker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceMaker.java index d12ad3183..ed3ebe173 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceMaker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceMaker.java @@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RefWalker; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.io.PrintStream; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java index 82247d160..0423c6f0a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountIntervals.java @@ -36,7 +36,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RefWalker; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.io.PrintStream; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODs.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODs.java index be1e264c6..a0f943f7e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODs.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODs.java @@ -43,7 +43,7 @@ import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.io.PrintStream; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsByRef.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsByRef.java index 7135bffce..77490be93 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsByRef.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODsByRef.java @@ -35,7 +35,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RefWalker; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.util.Collections; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountTerminusEvent.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountTerminusEvent.java index 4e05033ec..cabc2f467 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountTerminusEvent.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountTerminusEvent.java @@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.Requires; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/readutils/ClipReads.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/readutils/ClipReads.java index 6b2c0f75c..fe2b75464 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/readutils/ClipReads.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/readutils/ClipReads.java @@ -45,7 +45,7 @@ import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.clipping.ClippingOp; import org.broadinstitute.sting.utils.clipping.ClippingRepresentation; import org.broadinstitute.sting.utils.clipping.ReadClipper; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java index 9f758706e..e24c725a6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java @@ -49,6 +49,7 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.VariantEvalUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.vcf.VCFHeader; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -197,7 +198,7 @@ public class VariantEval extends RodWalker implements TreeRedu protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50; @Argument(shortName="ploidy", fullName="samplePloidy", doc="Per-sample ploidy (number of chromosomes per sample)", required=false) - protected int ploidy = VariantContextUtils.DEFAULT_PLOIDY; + protected int ploidy = GATKVariantContextUtils.DEFAULT_PLOIDY; @Argument(fullName="ancestralAlignments", shortName="aa", doc="Fasta file with ancestral alleles", required=false) private File ancestralAlignmentsFile = null; @@ -285,7 +286,7 @@ public class VariantEval extends RodWalker implements TreeRedu // Now that we have all the rods categorized, determine the sample list from the eval rods. Map vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), evals); - Set vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); + Set vcfSamples = SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); // Load the sample list, using an intermediate tree set to sort the samples final Set allSampleNames = SampleUtils.getSamplesFromCommandLineInput(vcfSamples); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java index 5c16c7385..a63f32485 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java @@ -34,7 +34,7 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.util.AnalysisModuleScanner; import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.gatk.walkers.varianteval.util.EvaluationContext; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.StingException; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/TandemRepeat.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/TandemRepeat.java index 5ef414b00..de82b18cc 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/TandemRepeat.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/TandemRepeat.java @@ -27,8 +27,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.variantcontext.VariantContext; -import org.broadinstitute.variant.variantcontext.VariantContextUtils; import java.util.Arrays; import java.util.List; @@ -51,7 +51,7 @@ public class TandemRepeat extends VariantStratifier { public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { if ( eval == null || ! eval.isIndel() ) return ALL; - else if ( VariantContextUtils.isTandemRepeat(eval, ref.getForwardBases()) ) { + else if ( GATKVariantContextUtils.isTandemRepeat(eval, ref.getForwardBases()) ) { print("REPEAT", eval, ref); return REPEAT; } else { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/manager/StratificationManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/manager/StratificationManager.java index d792e4c67..681d32f2d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/manager/StratificationManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/manager/StratificationManager.java @@ -28,7 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.manage import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index c54a57385..d0d6a68a8 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -38,6 +38,7 @@ import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCountConstants; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; @@ -135,14 +136,14 @@ public class CombineVariants extends RodWalker implements Tree protected VariantContextWriter vcfWriter = null; @Argument(shortName="genotypeMergeOptions", doc="Determines how we should merge genotype records for samples shared across the ROD files", required=false) - public VariantContextUtils.GenotypeMergeType genotypeMergeOption = null; + public GATKVariantContextUtils.GenotypeMergeType genotypeMergeOption = null; @Argument(shortName="filteredRecordsMergeType", doc="Determines how we should handle records seen at the same site in the VCF, but with different FILTER fields", required=false) - public VariantContextUtils.FilteredRecordMergeType filteredRecordsMergeType = VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED; + public GATKVariantContextUtils.FilteredRecordMergeType filteredRecordsMergeType = GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED; @Hidden @Argument(shortName="multipleAllelesMergeType", doc="Determines how we should handle records seen at the same site in the VCF, but with different allele types (for example, SNP vs. indel)", required=false) - public VariantContextUtils.MultipleAllelesMergeType multipleAllelesMergeType = VariantContextUtils.MultipleAllelesMergeType.BY_TYPE; + public GATKVariantContextUtils.MultipleAllelesMergeType multipleAllelesMergeType = GATKVariantContextUtils.MultipleAllelesMergeType.BY_TYPE; /** * Used when taking the union of variants that contain genotypes. A complete priority list MUST be provided. @@ -203,12 +204,12 @@ public class CombineVariants extends RodWalker implements Tree validateAnnotateUnionArguments(); if ( PRIORITY_STRING == null && genotypeMergeOption == null) { - genotypeMergeOption = VariantContextUtils.GenotypeMergeType.UNSORTED; + genotypeMergeOption = GATKVariantContextUtils.GenotypeMergeType.UNSORTED; //PRIORITY_STRING = Utils.join(",", vcfRods.keySet()); Deleted by Ami (7/10/12) logger.info("Priority string is not provided, using arbitrary genotyping order: "+priority); } - if (genotypeMergeOption == VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE && + if (genotypeMergeOption == GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE && !SampleUtils.verifyUniqueSamplesNames(vcfRods)) throw new IllegalStateException("REQUIRE_UNIQUE sample names is true but duplicate names were discovered."); @@ -232,7 +233,7 @@ public class CombineVariants extends RodWalker implements Tree private void validateAnnotateUnionArguments() { Set rodNames = SampleUtils.getRodNamesWithVCFHeader(getToolkit(), null); - if ( genotypeMergeOption == VariantContextUtils.GenotypeMergeType.PRIORITIZE && PRIORITY_STRING == null ) + if ( genotypeMergeOption == GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE && PRIORITY_STRING == null ) throw new UserException.MissingArgument("rod_priority_list", "Priority string must be provided if you want to prioritize genotypes"); if ( PRIORITY_STRING != null){ @@ -278,7 +279,7 @@ public class CombineVariants extends RodWalker implements Tree List mergedVCs = new ArrayList(); - if (multipleAllelesMergeType == VariantContextUtils.MultipleAllelesMergeType.BY_TYPE) { + if (multipleAllelesMergeType == GATKVariantContextUtils.MultipleAllelesMergeType.BY_TYPE) { Map> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs); // TODO -- clean this up in a refactoring @@ -296,13 +297,13 @@ public class CombineVariants extends RodWalker implements Tree // iterate over the types so that it's deterministic for (VariantContext.Type type : VariantContext.Type.values()) { if (VCsByType.containsKey(type)) - mergedVCs.add(VariantContextUtils.simpleMerge(VCsByType.get(type), - priority, rodNames.size() , filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges, + mergedVCs.add(GATKVariantContextUtils.simpleMerge(VCsByType.get(type), + priority, rodNames.size(), filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges, SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC)); } } - else if (multipleAllelesMergeType == VariantContextUtils.MultipleAllelesMergeType.MIX_TYPES) { - mergedVCs.add(VariantContextUtils.simpleMerge(vcs, + else if (multipleAllelesMergeType == GATKVariantContextUtils.MultipleAllelesMergeType.MIX_TYPES) { + mergedVCs.add(GATKVariantContextUtils.simpleMerge(vcs, priority, rodNames.size(), filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges, SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC)); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java index 8d5e7c2b8..048c7ef77 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java @@ -32,7 +32,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.report.GATKReport; import org.broadinstitute.sting.gatk.report.GATKReportTable; import org.broadinstitute.sting.gatk.walkers.RodWalker; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.variant.GATKVCFUtils; import org.broadinstitute.variant.variantcontext.*; import org.broadinstitute.variant.vcf.VCFHeader; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java index e94a771d3..e4d182d13 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java @@ -39,12 +39,12 @@ import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.utils.interval.IntervalSetRule; import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.vcf.*; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import org.broadinstitute.sting.utils.text.ListFileUtils; import org.broadinstitute.variant.variantcontext.VariantContext; -import org.broadinstitute.variant.variantcontext.VariantContextUtils; import java.io.File; import java.util.*; @@ -204,7 +204,7 @@ public class SelectHeaders extends RodWalker implements TreeRe } } - TreeSet vcfSamples = new TreeSet(SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE)); + TreeSet vcfSamples = new TreeSet(SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE)); VCFHeader vcfHeader = new VCFHeader(headerLines, vcfSamples); vcfHeader.setWriteEngineHeaders(includeEngineHeaders); vcfWriter.writeHeader(vcfHeader); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index dfe604a7d..4d30408d8 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -39,6 +39,7 @@ import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; @@ -337,7 +338,7 @@ public class SelectVariants extends RodWalker implements TreeR List rodNames = Arrays.asList(variantCollection.variants.getName()); vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames); - TreeSet vcfSamples = new TreeSet(SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE)); + TreeSet vcfSamples = new TreeSet(SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE)); Collection samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFiles); Collection samplesFromExpressions = SampleUtils.matchSamplesExpressions(vcfSamples, sampleExpressions); @@ -661,7 +662,7 @@ public class SelectVariants extends RodWalker implements TreeR // if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs and AD (because they are no longer accurate) if ( vc.getAlleles().size() != sub.getAlleles().size() ) - newGC = VariantContextUtils.stripPLsAndAD(sub.getGenotypes()); + newGC = GATKVariantContextUtils.stripPLsAndAD(sub.getGenotypes()); // if we have fewer samples in the selected VC than in the original VC, we need to strip out the MLE tags if ( vc.getNSamples() != sub.getNSamples() ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java index 17d5ab1b1..5bf5b96e3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java @@ -35,13 +35,13 @@ import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.vcf.*; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import org.broadinstitute.variant.variantcontext.Allele; import org.broadinstitute.variant.variantcontext.VariantContext; import org.broadinstitute.variant.variantcontext.VariantContextBuilder; -import org.broadinstitute.variant.variantcontext.VariantContextUtils; import java.util.*; @@ -256,7 +256,7 @@ public class VariantValidationAssessor extends RodWalker //if ( popFile != null ) { // throw new StingException("We still need to implement this!"); //} else { - return VariantContextUtils.computeHardyWeinbergPvalue(vc); + return GATKVariantContextUtils.computeHardyWeinbergPvalue(vc); //} } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java index 91057c812..1ea85df47 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java @@ -29,6 +29,7 @@ import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.vcf.VCFConstants; import org.broadinstitute.variant.vcf.VCFHeader; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; @@ -180,7 +181,7 @@ public class VariantsToTable extends RodWalker { if ( !genotypeFieldsToTake.isEmpty() ) { Map vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), variants); - TreeSet vcfSamples = new TreeSet(SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE)); + TreeSet vcfSamples = new TreeSet(SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE)); samples.addAll(vcfSamples); // optimization: if there are no samples, we don't have to worry about any genotype fields diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index 61746cbf1..5afeccffe 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -119,7 +119,7 @@ public class VariantsToVCF extends RodWalker { if ( tracker == null || !BaseUtils.isRegularBase(ref.getBase()) ) return 0; - String rsID = dbsnp == null ? null : VCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbsnp.dbsnp, context.getLocation()), VariantContext.Type.SNP); + String rsID = dbsnp == null ? null : GATKVCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbsnp.dbsnp, context.getLocation()), VariantContext.Type.SNP); Collection contexts = getVariantContexts(tracker, ref); diff --git a/public/java/src/org/broadinstitute/sting/tools/CatVariants.java b/public/java/src/org/broadinstitute/sting/tools/CatVariants.java index 93246fd6f..10fb606f9 100644 --- a/public/java/src/org/broadinstitute/sting/tools/CatVariants.java +++ b/public/java/src/org/broadinstitute/sting/tools/CatVariants.java @@ -36,7 +36,7 @@ import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.variant.bcf2.BCF2Codec; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.variant.vcf.VCFCodec; import org.broadinstitute.variant.vcf.VCFHeader; import org.broadinstitute.sting.utils.exceptions.UserException; diff --git a/public/java/src/org/broadinstitute/sting/utils/MannWhitneyU.java b/public/java/src/org/broadinstitute/sting/utils/MannWhitneyU.java index 05468c6c2..74009682a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MannWhitneyU.java +++ b/public/java/src/org/broadinstitute/sting/utils/MannWhitneyU.java @@ -33,7 +33,7 @@ import org.apache.commons.math.MathException; import org.apache.commons.math.distribution.NormalDistribution; import org.apache.commons.math.distribution.NormalDistributionImpl; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.StingException; import java.io.Serializable; diff --git a/public/java/src/org/broadinstitute/sting/utils/SWPairwiseAlignment.java b/public/java/src/org/broadinstitute/sting/utils/SWPairwiseAlignment.java index b316d1117..e2edf7421 100644 --- a/public/java/src/org/broadinstitute/sting/utils/SWPairwiseAlignment.java +++ b/public/java/src/org/broadinstitute/sting/utils/SWPairwiseAlignment.java @@ -28,7 +28,7 @@ package org.broadinstitute.sting.utils; import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.StingException; import java.util.*; diff --git a/public/java/src/org/broadinstitute/sting/utils/SampleUtils.java b/public/java/src/org/broadinstitute/sting/utils/SampleUtils.java index f158308b4..b1de89dd8 100644 --- a/public/java/src/org/broadinstitute/sting/utils/SampleUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/SampleUtils.java @@ -29,11 +29,11 @@ import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMReadGroupRecord; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.variant.GATKVCFUtils; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.variant.vcf.VCFHeader; import org.broadinstitute.sting.utils.text.ListFileUtils; import org.broadinstitute.sting.utils.text.XReadLines; -import org.broadinstitute.variant.variantcontext.VariantContextUtils; import java.io.File; import java.io.FileNotFoundException; @@ -117,15 +117,15 @@ public class SampleUtils { } public static Set getSampleList(Map headers) { - return getSampleList(headers, VariantContextUtils.GenotypeMergeType.PRIORITIZE); + return getSampleList(headers, GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE); } - public static Set getSampleList(Map headers, VariantContextUtils.GenotypeMergeType mergeOption) { + public static Set getSampleList(Map headers, GATKVariantContextUtils.GenotypeMergeType mergeOption) { Set samples = new TreeSet(); for ( Map.Entry val : headers.entrySet() ) { VCFHeader header = val.getValue(); for ( String sample : header.getGenotypeSamples() ) { - samples.add(VariantContextUtils.mergedSampleName(val.getKey(), sample, mergeOption == VariantContextUtils.GenotypeMergeType.UNIQUIFY)); + samples.add(GATKVariantContextUtils.mergedSampleName(val.getKey(), sample, mergeOption == GATKVariantContextUtils.GenotypeMergeType.UNIQUIFY)); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java index 794caa315..8c7bce6ac 100644 --- a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java +++ b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java @@ -32,7 +32,7 @@ import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMUtils; import org.apache.log4j.Logger; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.ReadUtils; diff --git a/public/java/src/org/broadinstitute/variant/utils/Pair.java b/public/java/src/org/broadinstitute/sting/utils/collections/Pair.java similarity index 98% rename from public/java/src/org/broadinstitute/variant/utils/Pair.java rename to public/java/src/org/broadinstitute/sting/utils/collections/Pair.java index 858d5fbd7..4c00331a9 100644 --- a/public/java/src/org/broadinstitute/variant/utils/Pair.java +++ b/public/java/src/org/broadinstitute/sting/utils/collections/Pair.java @@ -23,7 +23,7 @@ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.variant.utils; +package org.broadinstitute.sting.utils.collections; public class Pair { diff --git a/public/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java b/public/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java index 1072fb1b7..39f5b06c6 100644 --- a/public/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java @@ -29,7 +29,7 @@ import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.QualityUtils; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; diff --git a/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java index 00dd037db..76ccede62 100644 --- a/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java @@ -30,7 +30,7 @@ import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.recalibration.EventType; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java index 099554a2c..bb0dc670b 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java @@ -37,7 +37,7 @@ import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.classloader.JVMUtils; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.StingException; diff --git a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java index 403e166c5..7374dda14 100644 --- a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java @@ -39,7 +39,7 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.XReadLines; diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 46eceefd5..29f8c8dcd 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -32,7 +32,7 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.NGSPlatform; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.variant.utils.BaseUtils; diff --git a/public/java/src/org/broadinstitute/sting/utils/variant/GATKVCFUtils.java b/public/java/src/org/broadinstitute/sting/utils/variant/GATKVCFUtils.java index b2069c7ee..cbc7c01ed 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variant/GATKVCFUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variant/GATKVCFUtils.java @@ -31,7 +31,7 @@ import org.broad.tribble.readers.PositionalBufferedStream; import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.variant.variantcontext.VariantContext; import org.broadinstitute.variant.vcf.*; @@ -147,4 +147,49 @@ public class GATKVCFUtils { return VCFUtils.withUpdatedContigs(header, engine.getArguments().referenceFile, engine.getMasterSequenceDictionary()); } + public static String rsIDOfFirstRealVariant(List VCs, VariantContext.Type type) { + if ( VCs == null ) + return null; + + String rsID = null; + for ( VariantContext vc : VCs ) { + if ( vc.getType() == type ) { + rsID = vc.getID(); + break; + } + } + + return rsID; + } + + /** + * Read all of the VCF records from source into memory, returning the header and the VariantContexts + * + * SHOULD ONLY BE USED FOR UNIT/INTEGRATION TESTING PURPOSES! + * + * @param source the file to read, must be in VCF4 format + * @return + * @throws java.io.IOException + */ + public static Pair> readVCF(final File source) throws IOException { + // read in the features + final List vcs = new ArrayList(); + final VCFCodec codec = new VCFCodec(); + PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(source)); + FeatureCodecHeader header = codec.readHeader(pbs); + pbs.close(); + + pbs = new PositionalBufferedStream(new FileInputStream(source)); + pbs.skip(header.getHeaderEnd()); + + final VCFHeader vcfHeader = (VCFHeader)header.getHeaderValue(); + + while ( ! pbs.isDone() ) { + final VariantContext vc = codec.decode(pbs); + if ( vc != null ) + vcs.add(vc); + } + + return new Pair>(vcfHeader, vcs); + } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java index 82241ad55..2ae289214 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java @@ -25,12 +25,79 @@ package org.broadinstitute.sting.utils.variant; +import com.google.java.contract.Requires; +import org.apache.commons.lang.ArrayUtils; +import org.apache.log4j.Logger; +import org.broad.tribble.TribbleException; +import org.broad.tribble.util.popgen.HardyWeinbergCalculation; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.variant.variantcontext.*; +import org.broadinstitute.variant.vcf.VCFConstants; + +import java.io.Serializable; +import java.util.*; public class GATKVariantContextUtils { + private static Logger logger = Logger.getLogger(GATKVariantContextUtils.class); + + public static final int DEFAULT_PLOIDY = 2; + public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call. + private static final List NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); + public final static String MERGE_FILTER_PREFIX = "filterIn"; + public final static String MERGE_REF_IN_ALL = "ReferenceInAll"; + public final static String MERGE_FILTER_IN_ALL = "FilteredInAll"; + public final static String MERGE_INTERSECTION = "Intersection"; + + public enum GenotypeMergeType { + /** + * Make all sample genotypes unique by file. Each sample shared across RODs gets named sample.ROD. + */ + UNIQUIFY, + /** + * Take genotypes in priority order (see the priority argument). + */ + PRIORITIZE, + /** + * Take the genotypes in any order. + */ + UNSORTED, + /** + * Require that all samples/genotypes be unique between all inputs. + */ + REQUIRE_UNIQUE + } + + public enum FilteredRecordMergeType { + /** + * Union - leaves the record if any record is unfiltered. + */ + KEEP_IF_ANY_UNFILTERED, + /** + * Requires all records present at site to be unfiltered. VCF files that don't contain the record don't influence this. + */ + KEEP_IF_ALL_UNFILTERED, + /** + * If any record is present at this site (regardless of possibly being filtered), then all such records are kept and the filters are reset. + */ + KEEP_UNCONDITIONAL + } + + public enum MultipleAllelesMergeType { + /** + * Combine only alleles of the same type (SNP, indel, etc.) into a single VCF record. + */ + BY_TYPE, + /** + * Merge all allele types at the same start position into the same VCF record. + */ + MIX_TYPES + } + /** * create a genome location, given a variant context * @param genomeLocParser parser @@ -41,4 +108,885 @@ public class GATKVariantContextUtils { return genomeLocParser.createGenomeLoc(vc.getChr(), vc.getStart(), vc.getEnd(), true); } + /** + * Returns true iff VC is an non-complex indel where every allele represents an expansion or + * contraction of a series of identical bases in the reference. + * + * For example, suppose the ref bases are CTCTCTGA, which includes a 3x repeat of CTCTCT + * + * If VC = -/CT, then this function returns true because the CT insertion matches exactly the + * upcoming reference. + * If VC = -/CTA then this function returns false because the CTA isn't a perfect match + * + * Now consider deletions: + * + * If VC = CT/- then again the same logic applies and this returns true + * The case of CTA/- makes no sense because it doesn't actually match the reference bases. + * + * The logic of this function is pretty simple. Take all of the non-null alleles in VC. For + * each insertion allele of n bases, check if that allele matches the next n reference bases. + * For each deletion allele of n bases, check if this matches the reference bases at n - 2 n, + * as it must necessarily match the first n bases. If this test returns true for all + * alleles you are a tandem repeat, otherwise you are not. + * + * @param vc + * @param refBasesStartingAtVCWithPad not this is assumed to include the PADDED reference + * @return + */ + @Requires({"vc != null", "refBasesStartingAtVCWithPad != null && refBasesStartingAtVCWithPad.length > 0"}) + public static boolean isTandemRepeat(final VariantContext vc, final byte[] refBasesStartingAtVCWithPad) { + final String refBasesStartingAtVCWithoutPad = new String(refBasesStartingAtVCWithPad).substring(1); + if ( ! vc.isIndel() ) // only indels are tandem repeats + return false; + + final Allele ref = vc.getReference(); + + for ( final Allele allele : vc.getAlternateAlleles() ) { + if ( ! isRepeatAllele(ref, allele, refBasesStartingAtVCWithoutPad) ) + return false; + } + + // we've passed all of the tests, so we are a repeat + return true; + } + + /** + * + * @param vc + * @param refBasesStartingAtVCWithPad + * @return + */ + @Requires({"vc != null", "refBasesStartingAtVCWithPad != null && refBasesStartingAtVCWithPad.length > 0"}) + public static Pair,byte[]> getNumTandemRepeatUnits(final VariantContext vc, final byte[] refBasesStartingAtVCWithPad) { + final boolean VERBOSE = false; + final String refBasesStartingAtVCWithoutPad = new String(refBasesStartingAtVCWithPad).substring(1); + if ( ! vc.isIndel() ) // only indels are tandem repeats + return null; + + final Allele refAllele = vc.getReference(); + final byte[] refAlleleBases = Arrays.copyOfRange(refAllele.getBases(), 1, refAllele.length()); + + byte[] repeatUnit = null; + final ArrayList lengths = new ArrayList(); + + for ( final Allele allele : vc.getAlternateAlleles() ) { + Pair result = getNumTandemRepeatUnits(refAlleleBases, Arrays.copyOfRange(allele.getBases(), 1, allele.length()), refBasesStartingAtVCWithoutPad.getBytes()); + + final int[] repetitionCount = result.first; + // repetition count = 0 means allele is not a tandem expansion of context + if (repetitionCount[0] == 0 || repetitionCount[1] == 0) + return null; + + if (lengths.size() == 0) { + lengths.add(repetitionCount[0]); // add ref allele length only once + } + lengths.add(repetitionCount[1]); // add this alt allele's length + + repeatUnit = result.second; + if (VERBOSE) { + System.out.println("RefContext:"+refBasesStartingAtVCWithoutPad); + System.out.println("Ref:"+refAllele.toString()+" Count:" + String.valueOf(repetitionCount[0])); + System.out.println("Allele:"+allele.toString()+" Count:" + String.valueOf(repetitionCount[1])); + System.out.println("RU:"+new String(repeatUnit)); + } + } + + return new Pair, byte[]>(lengths,repeatUnit); + } + + public static Pair getNumTandemRepeatUnits(final byte[] refBases, final byte[] altBases, final byte[] remainingRefContext) { + /* we can't exactly apply same logic as in basesAreRepeated() to compute tandem unit and number of repeated units. + Consider case where ref =ATATAT and we have an insertion of ATAT. Natural description is (AT)3 -> (AT)2. + */ + + byte[] longB; + // find first repeat unit based on either ref or alt, whichever is longer + if (altBases.length > refBases.length) + longB = altBases; + else + longB = refBases; + + // see if non-null allele (either ref or alt, whichever is longer) can be decomposed into several identical tandem units + // for example, -*,CACA needs to first be decomposed into (CA)2 + final int repeatUnitLength = findRepeatedSubstring(longB); + final byte[] repeatUnit = Arrays.copyOf(longB, repeatUnitLength); + + final int[] repetitionCount = new int[2]; + // look for repetitions forward on the ref bases (i.e. starting at beginning of ref bases) + int repetitionsInRef = findNumberofRepetitions(repeatUnit,refBases, true); + repetitionCount[0] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(refBases, remainingRefContext), true)-repetitionsInRef; + repetitionCount[1] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(altBases, remainingRefContext), true)-repetitionsInRef; + + return new Pair(repetitionCount, repeatUnit); + + } + + /** + * Find out if a string can be represented as a tandem number of substrings. + * For example ACTACT is a 2-tandem of ACT, + * but ACTACA is not. + * + * @param bases String to be tested + * @return Length of repeat unit, if string can be represented as tandem of substring (if it can't + * be represented as one, it will be just the length of the input string) + */ + public static int findRepeatedSubstring(byte[] bases) { + + int repLength; + for (repLength=1; repLength <=bases.length; repLength++) { + final byte[] candidateRepeatUnit = Arrays.copyOf(bases,repLength); + boolean allBasesMatch = true; + for (int start = repLength; start < bases.length; start += repLength ) { + // check that remaining of string is exactly equal to repeat unit + final byte[] basePiece = Arrays.copyOfRange(bases,start,start+candidateRepeatUnit.length); + if (!Arrays.equals(candidateRepeatUnit, basePiece)) { + allBasesMatch = false; + break; + } + } + if (allBasesMatch) + return repLength; + } + + return repLength; + } + + /** + * Helper routine that finds number of repetitions a string consists of. + * For example, for string ATAT and repeat unit AT, number of repetitions = 2 + * @param repeatUnit Substring + * @param testString String to test + * @oaram lookForward Look for repetitions forward (at beginning of string) or backward (at end of string) + * @return Number of repetitions (0 if testString is not a concatenation of n repeatUnit's + */ + public static int findNumberofRepetitions(byte[] repeatUnit, byte[] testString, boolean lookForward) { + int numRepeats = 0; + if (lookForward) { + // look forward on the test string + for (int start = 0; start < testString.length; start += repeatUnit.length) { + int end = start + repeatUnit.length; + byte[] unit = Arrays.copyOfRange(testString,start, end); + if(Arrays.equals(unit,repeatUnit)) + numRepeats++; + else + break; + } + return numRepeats; + } + + // look backward. For example, if repeatUnit = AT and testString = GATAT, number of repeat units is still 2 + // look forward on the test string + for (int start = testString.length - repeatUnit.length; start >= 0; start -= repeatUnit.length) { + int end = start + repeatUnit.length; + byte[] unit = Arrays.copyOfRange(testString,start, end); + if(Arrays.equals(unit,repeatUnit)) + numRepeats++; + else + break; + } + return numRepeats; + } + + /** + * Helper function for isTandemRepeat that checks that allele matches somewhere on the reference + * @param ref + * @param alt + * @param refBasesStartingAtVCWithoutPad + * @return + */ + protected static boolean isRepeatAllele(final Allele ref, final Allele alt, final String refBasesStartingAtVCWithoutPad) { + if ( ! Allele.oneIsPrefixOfOther(ref, alt) ) + return false; // we require one allele be a prefix of another + + if ( ref.length() > alt.length() ) { // we are a deletion + return basesAreRepeated(ref.getBaseString(), alt.getBaseString(), refBasesStartingAtVCWithoutPad, 2); + } else { // we are an insertion + return basesAreRepeated(alt.getBaseString(), ref.getBaseString(), refBasesStartingAtVCWithoutPad, 1); + } + } + + protected static boolean basesAreRepeated(final String l, final String s, final String ref, final int minNumberOfMatches) { + final String potentialRepeat = l.substring(s.length()); // skip s bases + + for ( int i = 0; i < minNumberOfMatches; i++) { + final int start = i * potentialRepeat.length(); + final int end = (i+1) * potentialRepeat.length(); + if ( ref.length() < end ) + return false; // we ran out of bases to test + final String refSub = ref.substring(start, end); + if ( ! refSub.equals(potentialRepeat) ) + return false; // repeat didn't match, fail + } + + return true; // we passed all tests, we matched + } + + /** + * subset the Variant Context to the specific set of alleles passed in (pruning the PLs appropriately) + * + * @param vc variant context with genotype likelihoods + * @param allelesToUse which alleles from the vc are okay to use; *** must be in the same relative order as those in the original VC *** + * @param assignGenotypes true if we should update the genotypes based on the (subsetted) PLs + * @return genotypes + */ + public static GenotypesContext subsetDiploidAlleles(final VariantContext vc, + final List allelesToUse, + final boolean assignGenotypes) { + + // the genotypes with PLs + final GenotypesContext oldGTs = vc.getGenotypes(); + + // samples + final List sampleIndices = oldGTs.getSampleNamesOrderedByName(); + + // the new genotypes to create + final GenotypesContext newGTs = GenotypesContext.create(); + + // we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward + final int numOriginalAltAlleles = vc.getAlternateAlleles().size(); + final int numNewAltAlleles = allelesToUse.size() - 1; + + // which PLs should be carried forward? + ArrayList likelihoodIndexesToUse = null; + + // an optimization: if we are supposed to use all (or none in the case of a ref call) of the alleles, + // then we can keep the PLs as is; otherwise, we determine which ones to keep + if ( numNewAltAlleles != numOriginalAltAlleles && numNewAltAlleles > 0 ) { + likelihoodIndexesToUse = new ArrayList(30); + + final boolean[] altAlleleIndexToUse = new boolean[numOriginalAltAlleles]; + for ( int i = 0; i < numOriginalAltAlleles; i++ ) { + if ( allelesToUse.contains(vc.getAlternateAllele(i)) ) + altAlleleIndexToUse[i] = true; + } + + // numLikelihoods takes total # of alleles. Use default # of chromosomes (ploidy) = 2 + final int numLikelihoods = GenotypeLikelihoods.numLikelihoods(1 + numOriginalAltAlleles, DEFAULT_PLOIDY); + for ( int PLindex = 0; PLindex < numLikelihoods; PLindex++ ) { + final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex); + // consider this entry only if both of the alleles are good + if ( (alleles.alleleIndex1 == 0 || altAlleleIndexToUse[alleles.alleleIndex1 - 1]) && (alleles.alleleIndex2 == 0 || altAlleleIndexToUse[alleles.alleleIndex2 - 1]) ) + likelihoodIndexesToUse.add(PLindex); + } + } + + // create the new genotypes + for ( int k = 0; k < oldGTs.size(); k++ ) { + final Genotype g = oldGTs.get(sampleIndices.get(k)); + if ( !g.hasLikelihoods() ) { + newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES)); + continue; + } + + // create the new likelihoods array from the alleles we are allowed to use + final double[] originalLikelihoods = g.getLikelihoods().getAsVector(); + double[] newLikelihoods; + if ( likelihoodIndexesToUse == null ) { + newLikelihoods = originalLikelihoods; + } else { + newLikelihoods = new double[likelihoodIndexesToUse.size()]; + int newIndex = 0; + for ( int oldIndex : likelihoodIndexesToUse ) + newLikelihoods[newIndex++] = originalLikelihoods[oldIndex]; + + // might need to re-normalize + newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true); + } + + // if there is no mass on the (new) likelihoods, then just no-call the sample + if ( MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) { + newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES)); + } + else { + final GenotypeBuilder gb = new GenotypeBuilder(g); + + if ( numNewAltAlleles == 0 ) + gb.noPL(); + else + gb.PL(newLikelihoods); + + // if we weren't asked to assign a genotype, then just no-call the sample + if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) { + gb.alleles(NO_CALL_ALLELES); + } + else { + // find the genotype with maximum likelihoods + int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods); + GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex); + + gb.alleles(Arrays.asList(allelesToUse.get(alleles.alleleIndex1), allelesToUse.get(alleles.alleleIndex2))); + if ( numNewAltAlleles != 0 ) gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods)); + } + newGTs.add(gb.make()); + } + } + + return newGTs; + } + + /** + * Assign genotypes (GTs) to the samples in the Variant Context greedily based on the PLs + * + * @param vc variant context with genotype likelihoods + * @return genotypes context + */ + public static GenotypesContext assignDiploidGenotypes(final VariantContext vc) { + return subsetDiploidAlleles(vc, vc.getAlleles(), true); + } + + /** + * Split variant context into its biallelic components if there are more than 2 alleles + * + * For VC has A/B/C alleles, returns A/B and A/C contexts. + * Genotypes are all no-calls now (it's not possible to fix them easily) + * Alleles are right trimmed to satisfy VCF conventions + * + * If vc is biallelic or non-variant it is just returned + * + * Chromosome counts are updated (but they are by definition 0) + * + * @param vc a potentially multi-allelic variant context + * @return a list of bi-allelic (or monomorphic) variant context + */ + public static List splitVariantContextToBiallelics(final VariantContext vc) { + if ( ! vc.isVariant() || vc.isBiallelic() ) + // non variant or biallelics already satisfy the contract + return Collections.singletonList(vc); + else { + final List biallelics = new LinkedList(); + + for ( final Allele alt : vc.getAlternateAlleles() ) { + VariantContextBuilder builder = new VariantContextBuilder(vc); + final List alleles = Arrays.asList(vc.getReference(), alt); + builder.alleles(alleles); + builder.genotypes(subsetDiploidAlleles(vc, alleles, false)); + VariantContextUtils.calculateChromosomeCounts(builder, true); + biallelics.add(reverseTrimAlleles(builder.make())); + } + + return biallelics; + } + } + + public static Genotype removePLsAndAD(final Genotype g) { + return ( g.hasLikelihoods() || g.hasAD() ) ? new GenotypeBuilder(g).noPL().noAD().make() : g; + } + + /** + * Merges VariantContexts into a single hybrid. Takes genotypes for common samples in priority order, if provided. + * If uniquifySamples is true, the priority order is ignored and names are created by concatenating the VC name with + * the sample name + * + * @param unsortedVCs collection of unsorted VCs + * @param priorityListOfVCs priority list detailing the order in which we should grab the VCs + * @param filteredRecordMergeType merge type for filtered records + * @param genotypeMergeOptions merge option for genotypes + * @param annotateOrigin should we annotate the set it came from? + * @param printMessages should we print messages? + * @param setKey the key name of the set + * @param filteredAreUncalled are filtered records uncalled? + * @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count? + * @return new VariantContext representing the merge of unsortedVCs + */ + public static VariantContext simpleMerge(final Collection unsortedVCs, + final List priorityListOfVCs, + final FilteredRecordMergeType filteredRecordMergeType, + final GenotypeMergeType genotypeMergeOptions, + final boolean annotateOrigin, + final boolean printMessages, + final String setKey, + final boolean filteredAreUncalled, + final boolean mergeInfoWithMaxAC ) { + int originalNumOfVCs = priorityListOfVCs == null ? 0 : priorityListOfVCs.size(); + return simpleMerge(unsortedVCs,priorityListOfVCs,originalNumOfVCs,filteredRecordMergeType,genotypeMergeOptions,annotateOrigin,printMessages,setKey,filteredAreUncalled,mergeInfoWithMaxAC); + } + + /** + * Merges VariantContexts into a single hybrid. Takes genotypes for common samples in priority order, if provided. + * If uniquifySamples is true, the priority order is ignored and names are created by concatenating the VC name with + * the sample name. + * simpleMerge does not verify any more unique sample names EVEN if genotypeMergeOptions == GenotypeMergeType.REQUIRE_UNIQUE. One should use + * SampleUtils.verifyUniqueSamplesNames to check that before using sempleMerge. + * + * @param unsortedVCs collection of unsorted VCs + * @param priorityListOfVCs priority list detailing the order in which we should grab the VCs + * @param filteredRecordMergeType merge type for filtered records + * @param genotypeMergeOptions merge option for genotypes + * @param annotateOrigin should we annotate the set it came from? + * @param printMessages should we print messages? + * @param setKey the key name of the set + * @param filteredAreUncalled are filtered records uncalled? + * @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count? + * @return new VariantContext representing the merge of unsortedVCs + */ + public static VariantContext simpleMerge(final Collection unsortedVCs, + final List priorityListOfVCs, + final int originalNumOfVCs, + final FilteredRecordMergeType filteredRecordMergeType, + final GenotypeMergeType genotypeMergeOptions, + final boolean annotateOrigin, + final boolean printMessages, + final String setKey, + final boolean filteredAreUncalled, + final boolean mergeInfoWithMaxAC ) { + + if ( unsortedVCs == null || unsortedVCs.size() == 0 ) + return null; + + if (priorityListOfVCs != null && originalNumOfVCs != priorityListOfVCs.size()) + throw new IllegalArgumentException("the number of the original VariantContexts must be the same as the number of VariantContexts in the priority list"); + + if ( annotateOrigin && priorityListOfVCs == null && originalNumOfVCs == 0) + throw new IllegalArgumentException("Cannot merge calls and annotate their origins without a complete priority list of VariantContexts or the number of original VariantContexts"); + + final List preFilteredVCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, genotypeMergeOptions); + // Make sure all variant contexts are padded with reference base in case of indels if necessary + final List VCs = new ArrayList(); + + for (final VariantContext vc : preFilteredVCs) { + if ( ! filteredAreUncalled || vc.isNotFiltered() ) + VCs.add(vc); + } + if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled + return null; + + // establish the baseline info from the first VC + final VariantContext first = VCs.get(0); + final String name = first.getSource(); + final Allele refAllele = determineReferenceAllele(VCs); + + final Set alleles = new LinkedHashSet(); + final Set filters = new HashSet(); + final Map attributes = new LinkedHashMap(); + final Set inconsistentAttributes = new HashSet(); + final Set variantSources = new HashSet(); // contains the set of sources we found in our set of VCs that are variant + final Set rsIDs = new LinkedHashSet(1); // most of the time there's one id + + VariantContext longestVC = first; + int depth = 0; + int maxAC = -1; + final Map attributesWithMaxAC = new LinkedHashMap(); + double log10PError = CommonInfo.NO_LOG10_PERROR; + VariantContext vcWithMaxAC = null; + GenotypesContext genotypes = GenotypesContext.create(); + + // counting the number of filtered and variant VCs + int nFiltered = 0; + + boolean remapped = false; + + // cycle through and add info from the other VCs, making sure the loc/reference matches + + for ( final VariantContext vc : VCs ) { + if ( longestVC.getStart() != vc.getStart() ) + throw new IllegalStateException("BUG: attempting to merge VariantContexts with different start sites: first="+ first.toString() + " second=" + vc.toString()); + + if ( VariantContextUtils.getSize(vc) > VariantContextUtils.getSize(longestVC) ) + longestVC = vc; // get the longest location + + nFiltered += vc.isFiltered() ? 1 : 0; + if ( vc.isVariant() ) variantSources.add(vc.getSource()); + + AlleleMapper alleleMapping = resolveIncompatibleAlleles(refAllele, vc, alleles); + remapped = remapped || alleleMapping.needsRemapping(); + + alleles.addAll(alleleMapping.values()); + + mergeGenotypes(genotypes, vc, alleleMapping, genotypeMergeOptions == GenotypeMergeType.UNIQUIFY); + + // We always take the QUAL of the first VC with a non-MISSING qual for the combined value + if ( log10PError == CommonInfo.NO_LOG10_PERROR ) + log10PError = vc.getLog10PError(); + + filters.addAll(vc.getFilters()); + + // + // add attributes + // + // special case DP (add it up) and ID (just preserve it) + // + if (vc.hasAttribute(VCFConstants.DEPTH_KEY)) + depth += vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0); + if ( vc.hasID() ) rsIDs.add(vc.getID()); + if (mergeInfoWithMaxAC && vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY)) { + String rawAlleleCounts = vc.getAttributeAsString(VCFConstants.ALLELE_COUNT_KEY, null); + // lets see if the string contains a , separator + if (rawAlleleCounts.contains(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR)) { + List alleleCountArray = Arrays.asList(rawAlleleCounts.substring(1, rawAlleleCounts.length() - 1).split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR)); + for (String alleleCount : alleleCountArray) { + final int ac = Integer.valueOf(alleleCount.trim()); + if (ac > maxAC) { + maxAC = ac; + vcWithMaxAC = vc; + } + } + } else { + final int ac = Integer.valueOf(rawAlleleCounts); + if (ac > maxAC) { + maxAC = ac; + vcWithMaxAC = vc; + } + } + } + + for (final Map.Entry p : vc.getAttributes().entrySet()) { + String key = p.getKey(); + // if we don't like the key already, don't go anywhere + if ( ! inconsistentAttributes.contains(key) ) { + final boolean alreadyFound = attributes.containsKey(key); + final Object boundValue = attributes.get(key); + final boolean boundIsMissingValue = alreadyFound && boundValue.equals(VCFConstants.MISSING_VALUE_v4); + + if ( alreadyFound && ! boundValue.equals(p.getValue()) && ! boundIsMissingValue ) { + // we found the value but we're inconsistent, put it in the exclude list + //System.out.printf("Inconsistent INFO values: %s => %s and %s%n", key, boundValue, p.getValue()); + inconsistentAttributes.add(key); + attributes.remove(key); + } else if ( ! alreadyFound || boundIsMissingValue ) { // no value + //if ( vc != first ) System.out.printf("Adding key %s => %s%n", p.getKey(), p.getValue()); + attributes.put(key, p.getValue()); + } + } + } + } + + // if we have more alternate alleles in the merged VC than in one or more of the + // original VCs, we need to strip out the GL/PLs (because they are no longer accurate), as well as allele-dependent attributes like AC,AF, and AD + for ( final VariantContext vc : VCs ) { + if (vc.getAlleles().size() == 1) + continue; + if ( hasPLIncompatibleAlleles(alleles, vc.getAlleles())) { + if ( ! genotypes.isEmpty() ) { + logger.debug(String.format("Stripping PLs at %s:%d-%d due to incompatible alleles merged=%s vs. single=%s", + vc.getChr(), vc.getStart(), vc.getEnd(), alleles, vc.getAlleles())); + } + genotypes = stripPLsAndAD(genotypes); + // this will remove stale AC,AF attributed from vc + VariantContextUtils.calculateChromosomeCounts(vc, attributes, true); + break; + } + } + + // take the VC with the maxAC and pull the attributes into a modifiable map + if ( mergeInfoWithMaxAC && vcWithMaxAC != null ) { + attributesWithMaxAC.putAll(vcWithMaxAC.getAttributes()); + } + + // if at least one record was unfiltered and we want a union, clear all of the filters + if ( (filteredRecordMergeType == FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED && nFiltered != VCs.size()) || filteredRecordMergeType == FilteredRecordMergeType.KEEP_UNCONDITIONAL ) + filters.clear(); + + + if ( annotateOrigin ) { // we care about where the call came from + String setValue; + if ( nFiltered == 0 && variantSources.size() == originalNumOfVCs ) // nothing was unfiltered + setValue = MERGE_INTERSECTION; + else if ( nFiltered == VCs.size() ) // everything was filtered out + setValue = MERGE_FILTER_IN_ALL; + else if ( variantSources.isEmpty() ) // everyone was reference + setValue = MERGE_REF_IN_ALL; + else { + final LinkedHashSet s = new LinkedHashSet(); + for ( final VariantContext vc : VCs ) + if ( vc.isVariant() ) + s.add( vc.isFiltered() ? MERGE_FILTER_PREFIX + vc.getSource() : vc.getSource() ); + setValue = Utils.join("-", s); + } + + if ( setKey != null ) { + attributes.put(setKey, setValue); + if( mergeInfoWithMaxAC && vcWithMaxAC != null ) { + attributesWithMaxAC.put(setKey, setValue); + } + } + } + + if ( depth > 0 ) + attributes.put(VCFConstants.DEPTH_KEY, String.valueOf(depth)); + + final String ID = rsIDs.isEmpty() ? VCFConstants.EMPTY_ID_FIELD : Utils.join(",", rsIDs); + + final VariantContextBuilder builder = new VariantContextBuilder().source(name).id(ID); + builder.loc(longestVC.getChr(), longestVC.getStart(), longestVC.getEnd()); + builder.alleles(alleles); + builder.genotypes(genotypes); + builder.log10PError(log10PError); + builder.filters(filters.isEmpty() ? filters : new TreeSet(filters)); + builder.attributes(new TreeMap(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes)); + + // Trim the padded bases of all alleles if necessary + final VariantContext merged = builder.make(); + if ( printMessages && remapped ) System.out.printf("Remapped => %s%n", merged); + return merged; + } + + private static final boolean hasPLIncompatibleAlleles(final Collection alleleSet1, final Collection alleleSet2) { + final Iterator it1 = alleleSet1.iterator(); + final Iterator it2 = alleleSet2.iterator(); + + while ( it1.hasNext() && it2.hasNext() ) { + final Allele a1 = it1.next(); + final Allele a2 = it2.next(); + if ( ! a1.equals(a2) ) + return true; + } + + // by this point, at least one of the iterators is empty. All of the elements + // we've compared are equal up until this point. But it's possible that the + // sets aren't the same size, which is indicated by the test below. If they + // are of the same size, though, the sets are compatible + return it1.hasNext() || it2.hasNext(); + } + + public static GenotypesContext stripPLsAndAD(GenotypesContext genotypes) { + GenotypesContext newGs = GenotypesContext.create(genotypes.size()); + + for ( final Genotype g : genotypes ) { + newGs.add(removePLsAndAD(g)); + } + + return newGs; + } + + static private Allele determineReferenceAllele(List VCs) { + Allele ref = null; + + for ( VariantContext vc : VCs ) { + Allele myRef = vc.getReference(); + if ( ref == null || ref.length() < myRef.length() ) + ref = myRef; + else if ( ref.length() == myRef.length() && ! ref.equals(myRef) ) + throw new TribbleException(String.format("The provided variant file(s) have inconsistent references for the same position(s) at %s:%d, %s vs. %s", vc.getChr(), vc.getStart(), ref, myRef)); + } + + return ref; + } + + static private AlleleMapper resolveIncompatibleAlleles(Allele refAllele, VariantContext vc, Set allAlleles) { + if ( refAllele.equals(vc.getReference()) ) + return new AlleleMapper(vc); + else { + // we really need to do some work. The refAllele is the longest reference allele seen at this + // start site. So imagine it is: + // + // refAllele: ACGTGA + // myRef: ACGT + // myAlt: A + // + // We need to remap all of the alleles in vc to include the extra GA so that + // myRef => refAllele and myAlt => AGA + // + + Allele myRef = vc.getReference(); + if ( refAllele.length() <= myRef.length() ) throw new IllegalStateException("BUG: myRef="+myRef+" is longer than refAllele="+refAllele); + byte[] extraBases = Arrays.copyOfRange(refAllele.getBases(), myRef.length(), refAllele.length()); + +// System.out.printf("Remapping allele at %s%n", vc); +// System.out.printf("ref %s%n", refAllele); +// System.out.printf("myref %s%n", myRef ); +// System.out.printf("extrabases %s%n", new String(extraBases)); + + Map map = new HashMap(); + for ( Allele a : vc.getAlleles() ) { + if ( a.isReference() ) + map.put(a, refAllele); + else { + Allele extended = Allele.extend(a, extraBases); + for ( Allele b : allAlleles ) + if ( extended.equals(b) ) + extended = b; +// System.out.printf(" Extending %s => %s%n", a, extended); + map.put(a, extended); + } + } + + // debugging +// System.out.printf("mapping %s%n", map); + + return new AlleleMapper(map); + } + } + + public static List sortVariantContextsByPriority(Collection unsortedVCs, List priorityListOfVCs, GenotypeMergeType mergeOption ) { + if ( mergeOption == GenotypeMergeType.PRIORITIZE && priorityListOfVCs == null ) + throw new IllegalArgumentException("Cannot merge calls by priority with a null priority list"); + + if ( priorityListOfVCs == null || mergeOption == GenotypeMergeType.UNSORTED ) + return new ArrayList(unsortedVCs); + else { + ArrayList sorted = new ArrayList(unsortedVCs); + Collections.sort(sorted, new CompareByPriority(priorityListOfVCs)); + return sorted; + } + } + + private static void mergeGenotypes(GenotypesContext mergedGenotypes, VariantContext oneVC, AlleleMapper alleleMapping, boolean uniqifySamples) { + //TODO: should we add a check for cases when the genotypeMergeOption is REQUIRE_UNIQUE + for ( Genotype g : oneVC.getGenotypes() ) { + String name = mergedSampleName(oneVC.getSource(), g.getSampleName(), uniqifySamples); + if ( ! mergedGenotypes.containsSample(name) ) { + // only add if the name is new + Genotype newG = g; + + if ( uniqifySamples || alleleMapping.needsRemapping() ) { + final List alleles = alleleMapping.needsRemapping() ? alleleMapping.remap(g.getAlleles()) : g.getAlleles(); + newG = new GenotypeBuilder(g).name(name).alleles(alleles).make(); + } + + mergedGenotypes.add(newG); + } + } + } + + public static String mergedSampleName(String trackName, String sampleName, boolean uniqify ) { + return uniqify ? sampleName + "." + trackName : sampleName; + } + + public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) { + + // see whether we need to trim common reference base from all alleles + final int trimExtent = computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, false); + if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 ) + return inputVC; + + final List alleles = new ArrayList(); + final GenotypesContext genotypes = GenotypesContext.create(); + final Map originalToTrimmedAlleleMap = new HashMap(); + + for (final Allele a : inputVC.getAlleles()) { + if (a.isSymbolic()) { + alleles.add(a); + originalToTrimmedAlleleMap.put(a, a); + } else { + // get bases for current allele and create a new one with trimmed bases + final byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent); + final Allele trimmedAllele = Allele.create(newBases, a.isReference()); + alleles.add(trimmedAllele); + originalToTrimmedAlleleMap.put(a, trimmedAllele); + } + } + + // now we can recreate new genotypes with trimmed alleles + for ( final Genotype genotype : inputVC.getGenotypes() ) { + final List originalAlleles = genotype.getAlleles(); + final List trimmedAlleles = new ArrayList(); + for ( final Allele a : originalAlleles ) { + if ( a.isCalled() ) + trimmedAlleles.add(originalToTrimmedAlleleMap.get(a)); + else + trimmedAlleles.add(Allele.NO_CALL); + } + genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make()); + } + + return new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length() - 1).alleles(alleles).genotypes(genotypes).make(); + } + + public static int computeReverseClipping(final List unclippedAlleles, + final byte[] ref, + final int forwardClipping, + final boolean allowFullClip) { + int clipping = 0; + boolean stillClipping = true; + + while ( stillClipping ) { + for ( final Allele a : unclippedAlleles ) { + if ( a.isSymbolic() ) + continue; + + // we need to ensure that we don't reverse clip out all of the bases from an allele because we then will have the wrong + // position set for the VariantContext (although it's okay to forward clip it all out, because the position will be fine). + if ( a.length() - clipping == 0 ) + return clipping - (allowFullClip ? 0 : 1); + + if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 ) { + stillClipping = false; + } + else if ( ref.length == clipping ) { + if ( allowFullClip ) + stillClipping = false; + else + return -1; + } + else if ( a.getBases()[a.length()-clipping-1] != ref[ref.length-clipping-1] ) { + stillClipping = false; + } + } + if ( stillClipping ) + clipping++; + } + + return clipping; + } + + public static double computeHardyWeinbergPvalue(VariantContext vc) { + if ( vc.getCalledChrCount() == 0 ) + return 0.0; + return HardyWeinbergCalculation.hwCalculate(vc.getHomRefCount(), vc.getHetCount(), vc.getHomVarCount()); + } + + public static boolean requiresPaddingBase(final List alleles) { + + // see whether one of the alleles would be null if trimmed through + + for ( final String allele : alleles ) { + if ( allele.isEmpty() ) + return true; + } + + int clipping = 0; + Character currentBase = null; + + while ( true ) { + for ( final String allele : alleles ) { + if ( allele.length() - clipping == 0 ) + return true; + + char myBase = allele.charAt(clipping); + if ( currentBase == null ) + currentBase = myBase; + else if ( currentBase != myBase ) + return false; + } + + clipping++; + currentBase = null; + } + } + + private static class AlleleMapper { + private VariantContext vc = null; + private Map map = null; + public AlleleMapper(VariantContext vc) { this.vc = vc; } + public AlleleMapper(Map map) { this.map = map; } + public boolean needsRemapping() { return this.map != null; } + public Collection values() { return map != null ? map.values() : vc.getAlleles(); } + public Allele remap(Allele a) { return map != null && map.containsKey(a) ? map.get(a) : a; } + + public List remap(List as) { + List newAs = new ArrayList(); + for ( Allele a : as ) { + //System.out.printf(" Remapping %s => %s%n", a, remap(a)); + newAs.add(remap(a)); + } + return newAs; + } + } + + private static class CompareByPriority implements Comparator, Serializable { + List priorityListOfVCs; + public CompareByPriority(List priorityListOfVCs) { + this.priorityListOfVCs = priorityListOfVCs; + } + + private int getIndex(VariantContext vc) { + int i = priorityListOfVCs.indexOf(vc.getSource()); + if ( i == -1 ) throw new IllegalArgumentException("Priority list " + priorityListOfVCs + " doesn't contain variant context " + vc.getSource()); + return i; + } + + public int compare(VariantContext vc1, VariantContext vc2) { + return Integer.valueOf(getIndex(vc1)).compareTo(getIndex(vc2)); + } + } } diff --git a/public/java/src/org/broadinstitute/variant/utils/BaseUtils.java b/public/java/src/org/broadinstitute/variant/utils/BaseUtils.java index bbd02e0a6..76b49edb9 100644 --- a/public/java/src/org/broadinstitute/variant/utils/BaseUtils.java +++ b/public/java/src/org/broadinstitute/variant/utils/BaseUtils.java @@ -26,8 +26,6 @@ package org.broadinstitute.variant.utils; import net.sf.samtools.util.StringUtil; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; import java.util.Arrays; import java.util.Random; @@ -176,7 +174,7 @@ public class BaseUtils { if ( baseIndex == Base.N.ordinal() ) { bases[i] = 'N'; } else if ( errorOnBadReferenceBase && baseIndex == -1 ) { - throw new UserException.BadInput("We encountered a non-standard non-IUPAC base in the provided reference: '" + bases[i] + "'"); + throw new IllegalStateException("We encountered a non-standard non-IUPAC base in the provided reference: '" + bases[i] + "'"); } } return bases; @@ -517,7 +515,7 @@ public class BaseUtils { case 'N': return 'N'; default: - throw new ReviewedStingException("base must be A, C, G or T. " + (char) base + " is not a valid base."); + throw new IllegalArgumentException("base must be A, C, G or T. " + (char) base + " is not a valid base."); } } } diff --git a/public/java/src/org/broadinstitute/variant/utils/GeneralUtils.java b/public/java/src/org/broadinstitute/variant/utils/GeneralUtils.java index 79014a0eb..2dbc865b5 100644 --- a/public/java/src/org/broadinstitute/variant/utils/GeneralUtils.java +++ b/public/java/src/org/broadinstitute/variant/utils/GeneralUtils.java @@ -141,13 +141,6 @@ public class GeneralUtils { return normalized; } - public static double sum(double[] values) { - double s = 0.0; - for (double v : values) - s += v; - return s; - } - public static double arrayMax(final double[] array) { return array[maxElementIndex(array, array.length)]; } diff --git a/public/java/src/org/broadinstitute/variant/variantcontext/CommonInfo.java b/public/java/src/org/broadinstitute/variant/variantcontext/CommonInfo.java index fd3227dbf..16fa52ee0 100644 --- a/public/java/src/org/broadinstitute/variant/variantcontext/CommonInfo.java +++ b/public/java/src/org/broadinstitute/variant/variantcontext/CommonInfo.java @@ -36,7 +36,7 @@ import java.util.*; * * @author depristo */ -final class CommonInfo { +public final class CommonInfo { public static final double NO_LOG10_PERROR = 1.0; private static Set NO_FILTERS = Collections.emptySet(); diff --git a/public/java/src/org/broadinstitute/variant/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/variant/variantcontext/VariantContextUtils.java index a6378951e..fa2b5c9e5 100644 --- a/public/java/src/org/broadinstitute/variant/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/variant/variantcontext/VariantContextUtils.java @@ -29,27 +29,16 @@ import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import org.apache.commons.jexl2.Expression; import org.apache.commons.jexl2.JexlEngine; -import org.apache.commons.lang.ArrayUtils; import org.broad.tribble.TribbleException; -import org.broad.tribble.util.popgen.HardyWeinbergCalculation; import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.variant.utils.GeneralUtils; -import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.variant.vcf.*; -import java.io.Serializable; import java.util.*; public class VariantContextUtils { - public final static String MERGE_INTERSECTION = "Intersection"; - public final static String MERGE_FILTER_IN_ALL = "FilteredInAll"; - public final static String MERGE_REF_IN_ALL = "ReferenceInAll"; - public final static String MERGE_FILTER_PREFIX = "filterIn"; - public static final int DEFAULT_PLOIDY = 2; - public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call. private static Set MISSING_KEYS_WARNED_ABOUT = new HashSet(); - private static final List NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); final public static JexlEngine engine = new JexlEngine(); private final static boolean ASSUME_MISSING_FIELDS_ARE_STRINGS = false; @@ -158,10 +147,6 @@ public class VariantContextUtils { builder.attributes(calculateChromosomeCounts(vc, new HashMap(vc.getAttributes()), removeStaleValues, founderIds)); } - public static Genotype removePLsAndAD(final Genotype g) { - return ( g.hasLikelihoods() || g.hasAD() ) ? new GenotypeBuilder(g).noPL().noAD().make() : g; - } - public final static VCFCompoundHeaderLine getMetaDataForField(final VCFHeader header, final String field) { VCFCompoundHeaderLine metaData = header.getFormatHeaderLine(field); if ( metaData == null ) metaData = header.getInfoHeaderLine(field); @@ -180,443 +165,6 @@ public class VariantContextUtils { return metaData; } - /** - * Returns true iff VC is an non-complex indel where every allele represents an expansion or - * contraction of a series of identical bases in the reference. - * - * For example, suppose the ref bases are CTCTCTGA, which includes a 3x repeat of CTCTCT - * - * If VC = -/CT, then this function returns true because the CT insertion matches exactly the - * upcoming reference. - * If VC = -/CTA then this function returns false because the CTA isn't a perfect match - * - * Now consider deletions: - * - * If VC = CT/- then again the same logic applies and this returns true - * The case of CTA/- makes no sense because it doesn't actually match the reference bases. - * - * The logic of this function is pretty simple. Take all of the non-null alleles in VC. For - * each insertion allele of n bases, check if that allele matches the next n reference bases. - * For each deletion allele of n bases, check if this matches the reference bases at n - 2 n, - * as it must necessarily match the first n bases. If this test returns true for all - * alleles you are a tandem repeat, otherwise you are not. - * - * @param vc - * @param refBasesStartingAtVCWithPad not this is assumed to include the PADDED reference - * @return - */ - @Requires({"vc != null", "refBasesStartingAtVCWithPad != null && refBasesStartingAtVCWithPad.length > 0"}) - public static boolean isTandemRepeat(final VariantContext vc, final byte[] refBasesStartingAtVCWithPad) { - final String refBasesStartingAtVCWithoutPad = new String(refBasesStartingAtVCWithPad).substring(1); - if ( ! vc.isIndel() ) // only indels are tandem repeats - return false; - - final Allele ref = vc.getReference(); - - for ( final Allele allele : vc.getAlternateAlleles() ) { - if ( ! isRepeatAllele(ref, allele, refBasesStartingAtVCWithoutPad) ) - return false; - } - - // we've passed all of the tests, so we are a repeat - return true; - } - - /** - * - * @param vc - * @param refBasesStartingAtVCWithPad - * @return - */ - @Requires({"vc != null", "refBasesStartingAtVCWithPad != null && refBasesStartingAtVCWithPad.length > 0"}) - public static Pair,byte[]> getNumTandemRepeatUnits(final VariantContext vc, final byte[] refBasesStartingAtVCWithPad) { - final boolean VERBOSE = false; - final String refBasesStartingAtVCWithoutPad = new String(refBasesStartingAtVCWithPad).substring(1); - if ( ! vc.isIndel() ) // only indels are tandem repeats - return null; - - final Allele refAllele = vc.getReference(); - final byte[] refAlleleBases = Arrays.copyOfRange(refAllele.getBases(), 1, refAllele.length()); - - byte[] repeatUnit = null; - final ArrayList lengths = new ArrayList(); - - for ( final Allele allele : vc.getAlternateAlleles() ) { - Pair result = getNumTandemRepeatUnits(refAlleleBases, Arrays.copyOfRange(allele.getBases(), 1, allele.length()), refBasesStartingAtVCWithoutPad.getBytes()); - - final int[] repetitionCount = result.first; - // repetition count = 0 means allele is not a tandem expansion of context - if (repetitionCount[0] == 0 || repetitionCount[1] == 0) - return null; - - if (lengths.size() == 0) { - lengths.add(repetitionCount[0]); // add ref allele length only once - } - lengths.add(repetitionCount[1]); // add this alt allele's length - - repeatUnit = result.second; - if (VERBOSE) { - System.out.println("RefContext:"+refBasesStartingAtVCWithoutPad); - System.out.println("Ref:"+refAllele.toString()+" Count:" + String.valueOf(repetitionCount[0])); - System.out.println("Allele:"+allele.toString()+" Count:" + String.valueOf(repetitionCount[1])); - System.out.println("RU:"+new String(repeatUnit)); - } - } - - return new Pair, byte[]>(lengths,repeatUnit); - } - - public static Pair getNumTandemRepeatUnits(final byte[] refBases, final byte[] altBases, final byte[] remainingRefContext) { - /* we can't exactly apply same logic as in basesAreRepeated() to compute tandem unit and number of repeated units. - Consider case where ref =ATATAT and we have an insertion of ATAT. Natural description is (AT)3 -> (AT)2. - */ - - byte[] longB; - // find first repeat unit based on either ref or alt, whichever is longer - if (altBases.length > refBases.length) - longB = altBases; - else - longB = refBases; - - // see if non-null allele (either ref or alt, whichever is longer) can be decomposed into several identical tandem units - // for example, -*,CACA needs to first be decomposed into (CA)2 - final int repeatUnitLength = findRepeatedSubstring(longB); - final byte[] repeatUnit = Arrays.copyOf(longB, repeatUnitLength); - - final int[] repetitionCount = new int[2]; - // look for repetitions forward on the ref bases (i.e. starting at beginning of ref bases) - int repetitionsInRef = findNumberofRepetitions(repeatUnit,refBases, true); - repetitionCount[0] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(refBases, remainingRefContext), true)-repetitionsInRef; - repetitionCount[1] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(altBases, remainingRefContext), true)-repetitionsInRef; - - return new Pair(repetitionCount, repeatUnit); - - } - - /** - * Find out if a string can be represented as a tandem number of substrings. - * For example ACTACT is a 2-tandem of ACT, - * but ACTACA is not. - * - * @param bases String to be tested - * @return Length of repeat unit, if string can be represented as tandem of substring (if it can't - * be represented as one, it will be just the length of the input string) - */ - public static int findRepeatedSubstring(byte[] bases) { - - int repLength; - for (repLength=1; repLength <=bases.length; repLength++) { - final byte[] candidateRepeatUnit = Arrays.copyOf(bases,repLength); - boolean allBasesMatch = true; - for (int start = repLength; start < bases.length; start += repLength ) { - // check that remaining of string is exactly equal to repeat unit - final byte[] basePiece = Arrays.copyOfRange(bases,start,start+candidateRepeatUnit.length); - if (!Arrays.equals(candidateRepeatUnit, basePiece)) { - allBasesMatch = false; - break; - } - } - if (allBasesMatch) - return repLength; - } - - return repLength; - } - - /** - * Helper routine that finds number of repetitions a string consists of. - * For example, for string ATAT and repeat unit AT, number of repetitions = 2 - * @param repeatUnit Substring - * @param testString String to test - * @oaram lookForward Look for repetitions forward (at beginning of string) or backward (at end of string) - * @return Number of repetitions (0 if testString is not a concatenation of n repeatUnit's - */ - public static int findNumberofRepetitions(byte[] repeatUnit, byte[] testString, boolean lookForward) { - int numRepeats = 0; - if (lookForward) { - // look forward on the test string - for (int start = 0; start < testString.length; start += repeatUnit.length) { - int end = start + repeatUnit.length; - byte[] unit = Arrays.copyOfRange(testString,start, end); - if(Arrays.equals(unit,repeatUnit)) - numRepeats++; - else - break; - } - return numRepeats; - } - - // look backward. For example, if repeatUnit = AT and testString = GATAT, number of repeat units is still 2 - // look forward on the test string - for (int start = testString.length - repeatUnit.length; start >= 0; start -= repeatUnit.length) { - int end = start + repeatUnit.length; - byte[] unit = Arrays.copyOfRange(testString,start, end); - if(Arrays.equals(unit,repeatUnit)) - numRepeats++; - else - break; - } - return numRepeats; - } - - /** - * Helper function for isTandemRepeat that checks that allele matches somewhere on the reference - * @param ref - * @param alt - * @param refBasesStartingAtVCWithoutPad - * @return - */ - protected static boolean isRepeatAllele(final Allele ref, final Allele alt, final String refBasesStartingAtVCWithoutPad) { - if ( ! Allele.oneIsPrefixOfOther(ref, alt) ) - return false; // we require one allele be a prefix of another - - if ( ref.length() > alt.length() ) { // we are a deletion - return basesAreRepeated(ref.getBaseString(), alt.getBaseString(), refBasesStartingAtVCWithoutPad, 2); - } else { // we are an insertion - return basesAreRepeated(alt.getBaseString(), ref.getBaseString(), refBasesStartingAtVCWithoutPad, 1); - } - } - - protected static boolean basesAreRepeated(final String l, final String s, final String ref, final int minNumberOfMatches) { - final String potentialRepeat = l.substring(s.length()); // skip s bases - - for ( int i = 0; i < minNumberOfMatches; i++) { - final int start = i * potentialRepeat.length(); - final int end = (i+1) * potentialRepeat.length(); - if ( ref.length() < end ) - return false; // we ran out of bases to test - final String refSub = ref.substring(start, end); - if ( ! refSub.equals(potentialRepeat) ) - return false; // repeat didn't match, fail - } - - return true; // we passed all tests, we matched - } - - /** - * Assign genotypes (GTs) to the samples in the Variant Context greedily based on the PLs - * - * @param vc variant context with genotype likelihoods - * @return genotypes context - */ - public static GenotypesContext assignDiploidGenotypes(final VariantContext vc) { - return subsetDiploidAlleles(vc, vc.getAlleles(), true); - } - - /** - * Split variant context into its biallelic components if there are more than 2 alleles - * - * For VC has A/B/C alleles, returns A/B and A/C contexts. - * Genotypes are all no-calls now (it's not possible to fix them easily) - * Alleles are right trimmed to satisfy VCF conventions - * - * If vc is biallelic or non-variant it is just returned - * - * Chromosome counts are updated (but they are by definition 0) - * - * @param vc a potentially multi-allelic variant context - * @return a list of bi-allelic (or monomorphic) variant context - */ - public static List splitVariantContextToBiallelics(final VariantContext vc) { - if ( ! vc.isVariant() || vc.isBiallelic() ) - // non variant or biallelics already satisfy the contract - return Collections.singletonList(vc); - else { - final List biallelics = new LinkedList(); - - for ( final Allele alt : vc.getAlternateAlleles() ) { - VariantContextBuilder builder = new VariantContextBuilder(vc); - final List alleles = Arrays.asList(vc.getReference(), alt); - builder.alleles(alleles); - builder.genotypes(subsetDiploidAlleles(vc, alleles, false)); - calculateChromosomeCounts(builder, true); - biallelics.add(reverseTrimAlleles(builder.make())); - } - - return biallelics; - } - } - - /** - * subset the Variant Context to the specific set of alleles passed in (pruning the PLs appropriately) - * - * @param vc variant context with genotype likelihoods - * @param allelesToUse which alleles from the vc are okay to use; *** must be in the same relative order as those in the original VC *** - * @param assignGenotypes true if we should update the genotypes based on the (subsetted) PLs - * @return genotypes - */ - public static GenotypesContext subsetDiploidAlleles(final VariantContext vc, - final List allelesToUse, - final boolean assignGenotypes) { - - // the genotypes with PLs - final GenotypesContext oldGTs = vc.getGenotypes(); - - // samples - final List sampleIndices = oldGTs.getSampleNamesOrderedByName(); - - // the new genotypes to create - final GenotypesContext newGTs = GenotypesContext.create(); - - // we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward - final int numOriginalAltAlleles = vc.getAlternateAlleles().size(); - final int numNewAltAlleles = allelesToUse.size() - 1; - - // which PLs should be carried forward? - ArrayList likelihoodIndexesToUse = null; - - // an optimization: if we are supposed to use all (or none in the case of a ref call) of the alleles, - // then we can keep the PLs as is; otherwise, we determine which ones to keep - if ( numNewAltAlleles != numOriginalAltAlleles && numNewAltAlleles > 0 ) { - likelihoodIndexesToUse = new ArrayList(30); - - final boolean[] altAlleleIndexToUse = new boolean[numOriginalAltAlleles]; - for ( int i = 0; i < numOriginalAltAlleles; i++ ) { - if ( allelesToUse.contains(vc.getAlternateAllele(i)) ) - altAlleleIndexToUse[i] = true; - } - - // numLikelihoods takes total # of alleles. Use default # of chromosomes (ploidy) = 2 - final int numLikelihoods = GenotypeLikelihoods.numLikelihoods(1 + numOriginalAltAlleles, DEFAULT_PLOIDY); - for ( int PLindex = 0; PLindex < numLikelihoods; PLindex++ ) { - final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex); - // consider this entry only if both of the alleles are good - if ( (alleles.alleleIndex1 == 0 || altAlleleIndexToUse[alleles.alleleIndex1 - 1]) && (alleles.alleleIndex2 == 0 || altAlleleIndexToUse[alleles.alleleIndex2 - 1]) ) - likelihoodIndexesToUse.add(PLindex); - } - } - - // create the new genotypes - for ( int k = 0; k < oldGTs.size(); k++ ) { - final Genotype g = oldGTs.get(sampleIndices.get(k)); - if ( !g.hasLikelihoods() ) { - newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES)); - continue; - } - - // create the new likelihoods array from the alleles we are allowed to use - final double[] originalLikelihoods = g.getLikelihoods().getAsVector(); - double[] newLikelihoods; - if ( likelihoodIndexesToUse == null ) { - newLikelihoods = originalLikelihoods; - } else { - newLikelihoods = new double[likelihoodIndexesToUse.size()]; - int newIndex = 0; - for ( int oldIndex : likelihoodIndexesToUse ) - newLikelihoods[newIndex++] = originalLikelihoods[oldIndex]; - - // might need to re-normalize - newLikelihoods = GeneralUtils.normalizeFromLog10(newLikelihoods, false, true); - } - - // if there is no mass on the (new) likelihoods, then just no-call the sample - if ( GeneralUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) { - newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES)); - } - else { - final GenotypeBuilder gb = new GenotypeBuilder(g); - - if ( numNewAltAlleles == 0 ) - gb.noPL(); - else - gb.PL(newLikelihoods); - - // if we weren't asked to assign a genotype, then just no-call the sample - if ( !assignGenotypes || GeneralUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) { - gb.alleles(NO_CALL_ALLELES); - } - else { - // find the genotype with maximum likelihoods - int PLindex = numNewAltAlleles == 0 ? 0 : GeneralUtils.maxElementIndex(newLikelihoods); - GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex); - - gb.alleles(Arrays.asList(allelesToUse.get(alleles.alleleIndex1), allelesToUse.get(alleles.alleleIndex2))); - if ( numNewAltAlleles != 0 ) gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods)); - } - newGTs.add(gb.make()); - } - } - - return newGTs; - } - - public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) { - - // see whether we need to trim common reference base from all alleles - final int trimExtent = computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, false); - if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 ) - return inputVC; - - final List alleles = new ArrayList(); - final GenotypesContext genotypes = GenotypesContext.create(); - final Map originalToTrimmedAlleleMap = new HashMap(); - - for (final Allele a : inputVC.getAlleles()) { - if (a.isSymbolic()) { - alleles.add(a); - originalToTrimmedAlleleMap.put(a, a); - } else { - // get bases for current allele and create a new one with trimmed bases - final byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent); - final Allele trimmedAllele = Allele.create(newBases, a.isReference()); - alleles.add(trimmedAllele); - originalToTrimmedAlleleMap.put(a, trimmedAllele); - } - } - - // now we can recreate new genotypes with trimmed alleles - for ( final Genotype genotype : inputVC.getGenotypes() ) { - final List originalAlleles = genotype.getAlleles(); - final List trimmedAlleles = new ArrayList(); - for ( final Allele a : originalAlleles ) { - if ( a.isCalled() ) - trimmedAlleles.add(originalToTrimmedAlleleMap.get(a)); - else - trimmedAlleles.add(Allele.NO_CALL); - } - genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make()); - } - - return new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length() - 1).alleles(alleles).genotypes(genotypes).make(); - } - - public static int computeReverseClipping(final List unclippedAlleles, - final byte[] ref, - final int forwardClipping, - final boolean allowFullClip) { - int clipping = 0; - boolean stillClipping = true; - - while ( stillClipping ) { - for ( final Allele a : unclippedAlleles ) { - if ( a.isSymbolic() ) - continue; - - // we need to ensure that we don't reverse clip out all of the bases from an allele because we then will have the wrong - // position set for the VariantContext (although it's okay to forward clip it all out, because the position will be fine). - if ( a.length() - clipping == 0 ) - return clipping - (allowFullClip ? 0 : 1); - - if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 ) { - stillClipping = false; - } - else if ( ref.length == clipping ) { - if ( allowFullClip ) - stillClipping = false; - else - return -1; - } - else if ( a.getBases()[a.length()-clipping-1] != ref[ref.length-clipping-1] ) { - stillClipping = false; - } - } - if ( stillClipping ) - clipping++; - } - - return clipping; - } - /** * A simple but common wrapper for matching VariantContext objects using JEXL expressions */ @@ -744,12 +292,6 @@ public class VariantContextUtils { return new JEXLMap(exps,vc,g); } - public static double computeHardyWeinbergPvalue(VariantContext vc) { - if ( vc.getCalledChrCount() == 0 ) - return 0.0; - return HardyWeinbergCalculation.hwCalculate(vc.getHomRefCount(), vc.getHetCount(), vc.getHomVarCount()); - } - /** * Returns a newly allocated VC that is the same as VC, but without genotypes * @param vc variant context @@ -814,317 +356,6 @@ public class VariantContextUtils { return builder.genotypes(genotypes).attributes(attributes); } - public enum GenotypeMergeType { - /** - * Make all sample genotypes unique by file. Each sample shared across RODs gets named sample.ROD. - */ - UNIQUIFY, - /** - * Take genotypes in priority order (see the priority argument). - */ - PRIORITIZE, - /** - * Take the genotypes in any order. - */ - UNSORTED, - /** - * Require that all samples/genotypes be unique between all inputs. - */ - REQUIRE_UNIQUE - } - - public enum FilteredRecordMergeType { - /** - * Union - leaves the record if any record is unfiltered. - */ - KEEP_IF_ANY_UNFILTERED, - /** - * Requires all records present at site to be unfiltered. VCF files that don't contain the record don't influence this. - */ - KEEP_IF_ALL_UNFILTERED, - /** - * If any record is present at this site (regardless of possibly being filtered), then all such records are kept and the filters are reset. - */ - KEEP_UNCONDITIONAL - } - - public enum MultipleAllelesMergeType { - /** - * Combine only alleles of the same type (SNP, indel, etc.) into a single VCF record. - */ - BY_TYPE, - /** - * Merge all allele types at the same start position into the same VCF record. - */ - MIX_TYPES - } - - /** - * Merges VariantContexts into a single hybrid. Takes genotypes for common samples in priority order, if provided. - * If uniquifySamples is true, the priority order is ignored and names are created by concatenating the VC name with - * the sample name - * - * @param unsortedVCs collection of unsorted VCs - * @param priorityListOfVCs priority list detailing the order in which we should grab the VCs - * @param filteredRecordMergeType merge type for filtered records - * @param genotypeMergeOptions merge option for genotypes - * @param annotateOrigin should we annotate the set it came from? - * @param printMessages should we print messages? - * @param setKey the key name of the set - * @param filteredAreUncalled are filtered records uncalled? - * @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count? - * @return new VariantContext representing the merge of unsortedVCs - */ - public static VariantContext simpleMerge(final Collection unsortedVCs, - final List priorityListOfVCs, - final FilteredRecordMergeType filteredRecordMergeType, - final GenotypeMergeType genotypeMergeOptions, - final boolean annotateOrigin, - final boolean printMessages, - final String setKey, - final boolean filteredAreUncalled, - final boolean mergeInfoWithMaxAC ) { - int originalNumOfVCs = priorityListOfVCs == null ? 0 : priorityListOfVCs.size(); - return simpleMerge(unsortedVCs,priorityListOfVCs,originalNumOfVCs,filteredRecordMergeType,genotypeMergeOptions,annotateOrigin,printMessages,setKey,filteredAreUncalled,mergeInfoWithMaxAC); - } - - /** - * Merges VariantContexts into a single hybrid. Takes genotypes for common samples in priority order, if provided. - * If uniquifySamples is true, the priority order is ignored and names are created by concatenating the VC name with - * the sample name. - * simpleMerge does not verify any more unique sample names EVEN if genotypeMergeOptions == GenotypeMergeType.REQUIRE_UNIQUE. One should use - * SampleUtils.verifyUniqueSamplesNames to check that before using sempleMerge. - * - * @param unsortedVCs collection of unsorted VCs - * @param priorityListOfVCs priority list detailing the order in which we should grab the VCs - * @param filteredRecordMergeType merge type for filtered records - * @param genotypeMergeOptions merge option for genotypes - * @param annotateOrigin should we annotate the set it came from? - * @param printMessages should we print messages? - * @param setKey the key name of the set - * @param filteredAreUncalled are filtered records uncalled? - * @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count? - * @return new VariantContext representing the merge of unsortedVCs - */ - public static VariantContext simpleMerge(final Collection unsortedVCs, - final List priorityListOfVCs, - final int originalNumOfVCs, - final FilteredRecordMergeType filteredRecordMergeType, - final GenotypeMergeType genotypeMergeOptions, - final boolean annotateOrigin, - final boolean printMessages, - final String setKey, - final boolean filteredAreUncalled, - final boolean mergeInfoWithMaxAC ) { - - if ( unsortedVCs == null || unsortedVCs.size() == 0 ) - return null; - - if (priorityListOfVCs != null && originalNumOfVCs != priorityListOfVCs.size()) - throw new IllegalArgumentException("the number of the original VariantContexts must be the same as the number of VariantContexts in the priority list"); - - if ( annotateOrigin && priorityListOfVCs == null && originalNumOfVCs == 0) - throw new IllegalArgumentException("Cannot merge calls and annotate their origins without a complete priority list of VariantContexts or the number of original VariantContexts"); - - final List preFilteredVCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, genotypeMergeOptions); - // Make sure all variant contexts are padded with reference base in case of indels if necessary - final List VCs = new ArrayList(); - - for (final VariantContext vc : preFilteredVCs) { - if ( ! filteredAreUncalled || vc.isNotFiltered() ) - VCs.add(vc); - } - if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled - return null; - - // establish the baseline info from the first VC - final VariantContext first = VCs.get(0); - final String name = first.getSource(); - final Allele refAllele = determineReferenceAllele(VCs); - - final Set alleles = new LinkedHashSet(); - final Set filters = new HashSet(); - final Map attributes = new LinkedHashMap(); - final Set inconsistentAttributes = new HashSet(); - final Set variantSources = new HashSet(); // contains the set of sources we found in our set of VCs that are variant - final Set rsIDs = new LinkedHashSet(1); // most of the time there's one id - - VariantContext longestVC = first; - int depth = 0; - int maxAC = -1; - final Map attributesWithMaxAC = new LinkedHashMap(); - double log10PError = CommonInfo.NO_LOG10_PERROR; - VariantContext vcWithMaxAC = null; - GenotypesContext genotypes = GenotypesContext.create(); - - // counting the number of filtered and variant VCs - int nFiltered = 0; - - boolean remapped = false; - - // cycle through and add info from the other VCs, making sure the loc/reference matches - - for ( final VariantContext vc : VCs ) { - if ( longestVC.getStart() != vc.getStart() ) - throw new IllegalStateException("BUG: attempting to merge VariantContexts with different start sites: first="+ first.toString() + " second=" + vc.toString()); - - if ( getSize(vc) > getSize(longestVC) ) - longestVC = vc; // get the longest location - - nFiltered += vc.isFiltered() ? 1 : 0; - if ( vc.isVariant() ) variantSources.add(vc.getSource()); - - AlleleMapper alleleMapping = resolveIncompatibleAlleles(refAllele, vc, alleles); - remapped = remapped || alleleMapping.needsRemapping(); - - alleles.addAll(alleleMapping.values()); - - mergeGenotypes(genotypes, vc, alleleMapping, genotypeMergeOptions == GenotypeMergeType.UNIQUIFY); - - // We always take the QUAL of the first VC with a non-MISSING qual for the combined value - if ( log10PError == CommonInfo.NO_LOG10_PERROR ) - log10PError = vc.getLog10PError(); - - filters.addAll(vc.getFilters()); - - // - // add attributes - // - // special case DP (add it up) and ID (just preserve it) - // - if (vc.hasAttribute(VCFConstants.DEPTH_KEY)) - depth += vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0); - if ( vc.hasID() ) rsIDs.add(vc.getID()); - if (mergeInfoWithMaxAC && vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY)) { - String rawAlleleCounts = vc.getAttributeAsString(VCFConstants.ALLELE_COUNT_KEY, null); - // lets see if the string contains a , separator - if (rawAlleleCounts.contains(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR)) { - List alleleCountArray = Arrays.asList(rawAlleleCounts.substring(1, rawAlleleCounts.length() - 1).split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR)); - for (String alleleCount : alleleCountArray) { - final int ac = Integer.valueOf(alleleCount.trim()); - if (ac > maxAC) { - maxAC = ac; - vcWithMaxAC = vc; - } - } - } else { - final int ac = Integer.valueOf(rawAlleleCounts); - if (ac > maxAC) { - maxAC = ac; - vcWithMaxAC = vc; - } - } - } - - for (final Map.Entry p : vc.getAttributes().entrySet()) { - String key = p.getKey(); - // if we don't like the key already, don't go anywhere - if ( ! inconsistentAttributes.contains(key) ) { - final boolean alreadyFound = attributes.containsKey(key); - final Object boundValue = attributes.get(key); - final boolean boundIsMissingValue = alreadyFound && boundValue.equals(VCFConstants.MISSING_VALUE_v4); - - if ( alreadyFound && ! boundValue.equals(p.getValue()) && ! boundIsMissingValue ) { - // we found the value but we're inconsistent, put it in the exclude list - //System.out.printf("Inconsistent INFO values: %s => %s and %s%n", key, boundValue, p.getValue()); - inconsistentAttributes.add(key); - attributes.remove(key); - } else if ( ! alreadyFound || boundIsMissingValue ) { // no value - //if ( vc != first ) System.out.printf("Adding key %s => %s%n", p.getKey(), p.getValue()); - attributes.put(key, p.getValue()); - } - } - } - } - - // if we have more alternate alleles in the merged VC than in one or more of the - // original VCs, we need to strip out the GL/PLs (because they are no longer accurate), as well as allele-dependent attributes like AC,AF, and AD - for ( final VariantContext vc : VCs ) { - if (vc.alleles.size() == 1) - continue; - if ( hasPLIncompatibleAlleles(alleles, vc.alleles)) { - if ( GeneralUtils.DEBUG_MODE_ENABLED && ! genotypes.isEmpty() ) { - System.err.println(String.format("Stripping PLs at %s:%d-%d due to incompatible alleles merged=%s vs. single=%s", - vc.getChr(), vc.getStart(), vc.getEnd(), alleles, vc.alleles)); - } - genotypes = stripPLsAndAD(genotypes); - // this will remove stale AC,AF attributed from vc - calculateChromosomeCounts(vc, attributes, true); - break; - } - } - - // take the VC with the maxAC and pull the attributes into a modifiable map - if ( mergeInfoWithMaxAC && vcWithMaxAC != null ) { - attributesWithMaxAC.putAll(vcWithMaxAC.getAttributes()); - } - - // if at least one record was unfiltered and we want a union, clear all of the filters - if ( (filteredRecordMergeType == FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED && nFiltered != VCs.size()) || filteredRecordMergeType == FilteredRecordMergeType.KEEP_UNCONDITIONAL ) - filters.clear(); - - - if ( annotateOrigin ) { // we care about where the call came from - String setValue; - if ( nFiltered == 0 && variantSources.size() == originalNumOfVCs ) // nothing was unfiltered - setValue = MERGE_INTERSECTION; - else if ( nFiltered == VCs.size() ) // everything was filtered out - setValue = MERGE_FILTER_IN_ALL; - else if ( variantSources.isEmpty() ) // everyone was reference - setValue = MERGE_REF_IN_ALL; - else { - final LinkedHashSet s = new LinkedHashSet(); - for ( final VariantContext vc : VCs ) - if ( vc.isVariant() ) - s.add( vc.isFiltered() ? MERGE_FILTER_PREFIX + vc.getSource() : vc.getSource() ); - setValue = GeneralUtils.join("-", s); - } - - if ( setKey != null ) { - attributes.put(setKey, setValue); - if( mergeInfoWithMaxAC && vcWithMaxAC != null ) { - attributesWithMaxAC.put(setKey, setValue); - } - } - } - - if ( depth > 0 ) - attributes.put(VCFConstants.DEPTH_KEY, String.valueOf(depth)); - - final String ID = rsIDs.isEmpty() ? VCFConstants.EMPTY_ID_FIELD : GeneralUtils.join(",", rsIDs); - - final VariantContextBuilder builder = new VariantContextBuilder().source(name).id(ID); - builder.loc(longestVC.getChr(), longestVC.getStart(), longestVC.getEnd()); - builder.alleles(alleles); - builder.genotypes(genotypes); - builder.log10PError(log10PError); - builder.filters(filters.isEmpty() ? filters : new TreeSet(filters)); - builder.attributes(new TreeMap(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes)); - - // Trim the padded bases of all alleles if necessary - final VariantContext merged = builder.make(); - if ( printMessages && remapped ) System.out.printf("Remapped => %s%n", merged); - return merged; - } - - private static final boolean hasPLIncompatibleAlleles(final Collection alleleSet1, final Collection alleleSet2) { - final Iterator it1 = alleleSet1.iterator(); - final Iterator it2 = alleleSet2.iterator(); - - while ( it1.hasNext() && it2.hasNext() ) { - final Allele a1 = it1.next(); - final Allele a2 = it2.next(); - if ( ! a1.equals(a2) ) - return true; - } - - // by this point, at least one of the iterators is empty. All of the elements - // we've compared are equal up until this point. But it's possible that the - // sets aren't the same size, which is indicated by the test below. If they - // are of the same size, though, the sets are compatible - return it1.hasNext() || it2.hasNext(); - } - public static boolean allelesAreSubset(VariantContext vc1, VariantContext vc2) { // if all alleles of vc1 are a contained in alleles of vc2, return true if (!vc1.getReference().equals(vc2.getReference())) @@ -1138,16 +369,6 @@ public class VariantContextUtils { return true; } - public static GenotypesContext stripPLsAndAD(GenotypesContext genotypes) { - GenotypesContext newGs = GenotypesContext.create(genotypes.size()); - - for ( final Genotype g : genotypes ) { - newGs.add(removePLsAndAD(g)); - } - - return newGs; - } - public static Map> separateVariantContextsByType(Collection VCs) { HashMap> mappedVCs = new HashMap>(); for ( VariantContext vc : VCs ) { @@ -1196,26 +417,7 @@ public class VariantContextUtils { return mappedVCs; } - private static class AlleleMapper { - private VariantContext vc = null; - private Map map = null; - public AlleleMapper(VariantContext vc) { this.vc = vc; } - public AlleleMapper(Map map) { this.map = map; } - public boolean needsRemapping() { return this.map != null; } - public Collection values() { return map != null ? map.values() : vc.getAlleles(); } - public Allele remap(Allele a) { return map != null && map.containsKey(a) ? map.get(a) : a; } - - public List remap(List as) { - List newAs = new ArrayList(); - for ( Allele a : as ) { - //System.out.printf(" Remapping %s => %s%n", a, remap(a)); - newAs.add(remap(a)); - } - return newAs; - } - } - -// TODO: remove that after testing + // TODO: remove that after testing // static private void verifyUniqueSampleNames(Collection unsortedVCs) { // Set names = new HashSet(); // for ( VariantContext vc : unsortedVCs ) { @@ -1230,117 +432,6 @@ public class VariantContextUtils { // } - static private Allele determineReferenceAllele(List VCs) { - Allele ref = null; - - for ( VariantContext vc : VCs ) { - Allele myRef = vc.getReference(); - if ( ref == null || ref.length() < myRef.length() ) - ref = myRef; - else if ( ref.length() == myRef.length() && ! ref.equals(myRef) ) - throw new TribbleException(String.format("The provided variant file(s) have inconsistent references for the same position(s) at %s:%d, %s vs. %s", vc.getChr(), vc.getStart(), ref, myRef)); - } - - return ref; - } - - static private AlleleMapper resolveIncompatibleAlleles(Allele refAllele, VariantContext vc, Set allAlleles) { - if ( refAllele.equals(vc.getReference()) ) - return new AlleleMapper(vc); - else { - // we really need to do some work. The refAllele is the longest reference allele seen at this - // start site. So imagine it is: - // - // refAllele: ACGTGA - // myRef: ACGT - // myAlt: A - // - // We need to remap all of the alleles in vc to include the extra GA so that - // myRef => refAllele and myAlt => AGA - // - - Allele myRef = vc.getReference(); - if ( refAllele.length() <= myRef.length() ) throw new IllegalStateException("BUG: myRef="+myRef+" is longer than refAllele="+refAllele); - byte[] extraBases = Arrays.copyOfRange(refAllele.getBases(), myRef.length(), refAllele.length()); - -// System.out.printf("Remapping allele at %s%n", vc); -// System.out.printf("ref %s%n", refAllele); -// System.out.printf("myref %s%n", myRef ); -// System.out.printf("extrabases %s%n", new String(extraBases)); - - Map map = new HashMap(); - for ( Allele a : vc.getAlleles() ) { - if ( a.isReference() ) - map.put(a, refAllele); - else { - Allele extended = Allele.extend(a, extraBases); - for ( Allele b : allAlleles ) - if ( extended.equals(b) ) - extended = b; -// System.out.printf(" Extending %s => %s%n", a, extended); - map.put(a, extended); - } - } - - // debugging -// System.out.printf("mapping %s%n", map); - - return new AlleleMapper(map); - } - } - - static class CompareByPriority implements Comparator, Serializable { - List priorityListOfVCs; - public CompareByPriority(List priorityListOfVCs) { - this.priorityListOfVCs = priorityListOfVCs; - } - - private int getIndex(VariantContext vc) { - int i = priorityListOfVCs.indexOf(vc.getSource()); - if ( i == -1 ) throw new IllegalArgumentException("Priority list " + priorityListOfVCs + " doesn't contain variant context " + vc.getSource()); - return i; - } - - public int compare(VariantContext vc1, VariantContext vc2) { - return Integer.valueOf(getIndex(vc1)).compareTo(getIndex(vc2)); - } - } - - public static List sortVariantContextsByPriority(Collection unsortedVCs, List priorityListOfVCs, GenotypeMergeType mergeOption ) { - if ( mergeOption == GenotypeMergeType.PRIORITIZE && priorityListOfVCs == null ) - throw new IllegalArgumentException("Cannot merge calls by priority with a null priority list"); - - if ( priorityListOfVCs == null || mergeOption == GenotypeMergeType.UNSORTED ) - return new ArrayList(unsortedVCs); - else { - ArrayList sorted = new ArrayList(unsortedVCs); - Collections.sort(sorted, new CompareByPriority(priorityListOfVCs)); - return sorted; - } - } - - private static void mergeGenotypes(GenotypesContext mergedGenotypes, VariantContext oneVC, AlleleMapper alleleMapping, boolean uniqifySamples) { - //TODO: should we add a check for cases when the genotypeMergeOption is REQUIRE_UNIQUE - for ( Genotype g : oneVC.getGenotypes() ) { - String name = mergedSampleName(oneVC.getSource(), g.getSampleName(), uniqifySamples); - if ( ! mergedGenotypes.containsSample(name) ) { - // only add if the name is new - Genotype newG = g; - - if ( uniqifySamples || alleleMapping.needsRemapping() ) { - final List alleles = alleleMapping.needsRemapping() ? alleleMapping.remap(g.getAlleles()) : g.getAlleles(); - newG = new GenotypeBuilder(g).name(name).alleles(alleles).make(); - } - - mergedGenotypes.add(newG); - } - } - } - - public static String mergedSampleName(String trackName, String sampleName, boolean uniqify ) { - return uniqify ? sampleName + "." + trackName : sampleName; - } - /** * Returns a context identical to this with the REF and ALT alleles reverse complemented. * @@ -1460,33 +551,4 @@ public class VariantContextUtils { } } - public static boolean requiresPaddingBase(final List alleles) { - - // see whether one of the alleles would be null if trimmed through - - for ( final String allele : alleles ) { - if ( allele.isEmpty() ) - return true; - } - - int clipping = 0; - Character currentBase = null; - - while ( true ) { - for ( final String allele : alleles ) { - if ( allele.length() - clipping == 0 ) - return true; - - char myBase = allele.charAt(clipping); - if ( currentBase == null ) - currentBase = myBase; - else if ( currentBase != myBase ) - return false; - } - - clipping++; - currentBase = null; - } - } - } diff --git a/public/java/src/org/broadinstitute/variant/vcf/VCFUtils.java b/public/java/src/org/broadinstitute/variant/vcf/VCFUtils.java index db2d47609..f61761652 100644 --- a/public/java/src/org/broadinstitute/variant/vcf/VCFUtils.java +++ b/public/java/src/org/broadinstitute/variant/vcf/VCFUtils.java @@ -28,15 +28,9 @@ package org.broadinstitute.variant.vcf; import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceRecord; import org.apache.commons.io.FilenameUtils; -import org.broad.tribble.FeatureCodecHeader; -import org.broad.tribble.readers.PositionalBufferedStream; import org.broadinstitute.variant.utils.GeneralUtils; -import org.broadinstitute.variant.utils.Pair; -import org.broadinstitute.variant.variantcontext.VariantContext; import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; import java.util.*; public class VCFUtils { @@ -106,21 +100,6 @@ public class VCFUtils { return new HashSet(map.values()); } - public static String rsIDOfFirstRealVariant(List VCs, VariantContext.Type type) { - if ( VCs == null ) - return null; - - String rsID = null; - for ( VariantContext vc : VCs ) { - if ( vc.getType() == type ) { - rsID = vc.getID(); - break; - } - } - - return rsID; - } - /** * Add / replace the contig header lines in the VCFHeader with the in the reference file and master reference dictionary * @@ -198,35 +177,6 @@ public class VCFUtils { return assembly; } - /** - * Read all of the VCF records from source into memory, returning the header and the VariantContexts - * - * @param source the file to read, must be in VCF4 format - * @return - * @throws java.io.IOException - */ - public static Pair> readVCF(final File source) throws IOException { - // read in the features - final List vcs = new ArrayList(); - final VCFCodec codec = new VCFCodec(); - PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(source)); - FeatureCodecHeader header = codec.readHeader(pbs); - pbs.close(); - - pbs = new PositionalBufferedStream(new FileInputStream(source)); - pbs.skip(header.getHeaderEnd()); - - final VCFHeader vcfHeader = (VCFHeader)header.getHeaderValue(); - - while ( ! pbs.isDone() ) { - final VariantContext vc = codec.decode(pbs); - if ( vc != null ) - vcs.add(vc); - } - - return new Pair>(vcfHeader, vcs); - } - /** Only displays a warning if warnings are enabled and an identical warning hasn't been already issued */ private static final class HeaderConflictWarner { boolean emitWarnings; diff --git a/public/java/test/org/broadinstitute/sting/WalkerTest.java b/public/java/test/org/broadinstitute/sting/WalkerTest.java index 6c0a18f1d..eec0f653a 100644 --- a/public/java/test/org/broadinstitute/sting/WalkerTest.java +++ b/public/java/test/org/broadinstitute/sting/WalkerTest.java @@ -35,7 +35,7 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.variant.bcf2.BCF2Utils; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.variant.vcf.VCFCodec; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.StingException; diff --git a/public/java/test/org/broadinstitute/sting/utils/MWUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/MWUnitTest.java index a56045165..9d4c562c7 100644 --- a/public/java/test/org/broadinstitute/sting/utils/MWUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/MWUnitTest.java @@ -26,7 +26,7 @@ package org.broadinstitute.sting.utils; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.variant.utils.Pair; +import org.broadinstitute.sting.utils.collections.Pair; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfileUnitTest.java index 35fa47ede..eb2eebd36 100644 --- a/public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfileUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfileUnitTest.java @@ -36,11 +36,9 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; -import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.variant.variantcontext.VariantContext; import org.broadinstitute.variant.variantcontext.VariantContextTestProvider; import org.broadinstitute.variant.vcf.VCFCodec; -import org.broadinstitute.variant.vcf.VCFHeader; import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.DataProvider; @@ -252,13 +250,13 @@ public class BandPassActivityProfileUnitTest extends BaseTest { final File file = new File(path); final VCFCodec codec = new VCFCodec(); - final Pair> reader = VariantContextTestProvider.readAllVCs(file, codec); + final VariantContextTestProvider.VariantContextContainer reader = VariantContextTestProvider.readAllVCs(file, codec); final List incRegions = new ArrayList(); final BandPassActivityProfile incProfile = new BandPassActivityProfile(genomeLocParser); final BandPassActivityProfile fullProfile = new BandPassActivityProfile(genomeLocParser); int pos = start; - for ( final VariantContext vc : reader.getSecond() ) { + for ( final VariantContext vc : reader.getVCs() ) { if ( vc == null ) continue; while ( pos < vc.getStart() ) { final GenomeLoc loc = genomeLocParser.createGenomeLoc(contig, pos); diff --git a/public/java/test/org/broadinstitute/variant/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variant/GATKVariantContextUtilsUnitTest.java similarity index 79% rename from public/java/test/org/broadinstitute/variant/variantcontext/VariantContextUtilsUnitTest.java rename to public/java/test/org/broadinstitute/sting/utils/variant/GATKVariantContextUtilsUnitTest.java index 9b24f64e7..6ff052bdc 100644 --- a/public/java/test/org/broadinstitute/variant/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variant/GATKVariantContextUtilsUnitTest.java @@ -1,6 +1,6 @@ /* * Copyright (c) 2012 The Broad Institute -* +* * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without @@ -9,10 +9,10 @@ * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: -* +* * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. -* +* * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND @@ -23,33 +23,25 @@ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.variant.variantcontext; +package org.broadinstitute.sting.utils.variant; -import net.sf.picard.reference.IndexedFastaSequenceFile; -import org.broadinstitute.variant.VariantBaseTest; -import org.broadinstitute.variant.utils.GeneralUtils; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.variant.variantcontext.*; import org.testng.Assert; import org.testng.annotations.BeforeSuite; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import java.io.File; -import java.io.FileNotFoundException; import java.util.*; -public class VariantContextUtilsUnitTest extends VariantBaseTest { +public class GATKVariantContextUtilsUnitTest extends BaseTest { + Allele Aref, T, C, G, Cref, ATC, ATCATC; @BeforeSuite public void setup() { - final File referenceFile = new File(b37KGReference); - try { - IndexedFastaSequenceFile seq = new IndexedFastaSequenceFile(referenceFile); - } - catch(FileNotFoundException ex) { - throw new RuntimeException(referenceFile.getAbsolutePath(),ex); - } - // alleles Aref = Allele.create("A", true); Cref = Allele.create("C", true); @@ -186,10 +178,10 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest { final List priority = vcs2priority(inputs); - final VariantContext merged = VariantContextUtils.simpleMerge( + final VariantContext merged = GATKVariantContextUtils.simpleMerge( inputs, priority, - VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, - VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, "set", false, false); + GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, "set", false, false); Assert.assertEquals(merged.getAlleles(), cfg.expected); } @@ -244,10 +236,10 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest { inputs.add(new VariantContextBuilder(snpVC1).id(id).make()); } - final VariantContext merged = VariantContextUtils.simpleMerge( + final VariantContext merged = GATKVariantContextUtils.simpleMerge( inputs, null, - VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, - VariantContextUtils.GenotypeMergeType.UNSORTED, false, false, "set", false, false); + GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + GATKVariantContextUtils.GenotypeMergeType.UNSORTED, false, false, "set", false, false); Assert.assertEquals(merged.getID(), cfg.expected); } @@ -261,14 +253,14 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest { List inputs; VariantContext expected; String setExpected; - VariantContextUtils.FilteredRecordMergeType type; + GATKVariantContextUtils.FilteredRecordMergeType type; private MergeFilteredTest(String name, VariantContext input1, VariantContext input2, VariantContext expected, String setExpected) { - this(name, input1, input2, expected, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, setExpected); + this(name, input1, input2, expected, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, setExpected); } - private MergeFilteredTest(String name, VariantContext input1, VariantContext input2, VariantContext expected, VariantContextUtils.FilteredRecordMergeType type, String setExpected) { + private MergeFilteredTest(String name, VariantContext input1, VariantContext input2, VariantContext expected, GATKVariantContextUtils.FilteredRecordMergeType type, String setExpected) { super(MergeFilteredTest.class, name); LinkedList all = new LinkedList(Arrays.asList(input1, input2)); this.expected = expected; @@ -288,66 +280,66 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest { makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), - VariantContextUtils.MERGE_INTERSECTION); + GATKVariantContextUtils.MERGE_INTERSECTION); new MergeFilteredTest("noFilters", makeVC("1", Arrays.asList(Aref, T), "."), makeVC("2", Arrays.asList(Aref, T), "."), makeVC("3", Arrays.asList(Aref, T), "."), - VariantContextUtils.MERGE_INTERSECTION); + GATKVariantContextUtils.MERGE_INTERSECTION); new MergeFilteredTest("oneFiltered", makeVC("1", Arrays.asList(Aref, T), "."), makeVC("2", Arrays.asList(Aref, T), "FAIL"), makeVC("3", Arrays.asList(Aref, T), "."), - String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX)); + String.format("1-%s2", GATKVariantContextUtils.MERGE_FILTER_PREFIX)); new MergeFilteredTest("onePassOneFail", makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), makeVC("2", Arrays.asList(Aref, T), "FAIL"), makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), - String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX)); + String.format("1-%s2", GATKVariantContextUtils.MERGE_FILTER_PREFIX)); new MergeFilteredTest("AllFiltered", makeVC("1", Arrays.asList(Aref, T), "FAIL"), makeVC("2", Arrays.asList(Aref, T), "FAIL"), makeVC("3", Arrays.asList(Aref, T), "FAIL"), - VariantContextUtils.MERGE_FILTER_IN_ALL); + GATKVariantContextUtils.MERGE_FILTER_IN_ALL); // test ALL vs. ANY new MergeFilteredTest("FailOneUnfiltered", makeVC("1", Arrays.asList(Aref, T), "FAIL"), makeVC("2", Arrays.asList(Aref, T), "."), makeVC("3", Arrays.asList(Aref, T), "."), - VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, - String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX)); + GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + String.format("%s1-2", GATKVariantContextUtils.MERGE_FILTER_PREFIX)); new MergeFilteredTest("OneFailAllUnfilteredArg", makeVC("1", Arrays.asList(Aref, T), "FAIL"), makeVC("2", Arrays.asList(Aref, T), "."), makeVC("3", Arrays.asList(Aref, T), "FAIL"), - VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ALL_UNFILTERED, - String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX)); + GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ALL_UNFILTERED, + String.format("%s1-2", GATKVariantContextUtils.MERGE_FILTER_PREFIX)); // test excluding allele in filtered record new MergeFilteredTest("DontIncludeAlleleOfFilteredRecords", makeVC("1", Arrays.asList(Aref, T), "."), makeVC("2", Arrays.asList(Aref, T), "FAIL"), makeVC("3", Arrays.asList(Aref, T), "."), - String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX)); + String.format("1-%s2", GATKVariantContextUtils.MERGE_FILTER_PREFIX)); // promotion of site from unfiltered to PASSES new MergeFilteredTest("UnfilteredPlusPassIsPass", makeVC("1", Arrays.asList(Aref, T), "."), makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), - VariantContextUtils.MERGE_INTERSECTION); + GATKVariantContextUtils.MERGE_INTERSECTION); new MergeFilteredTest("RefInAll", makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), makeVC("2", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), makeVC("3", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), - VariantContextUtils.MERGE_REF_IN_ALL); + GATKVariantContextUtils.MERGE_REF_IN_ALL); new MergeFilteredTest("RefInOne", makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), @@ -361,8 +353,8 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest { @Test(dataProvider = "mergeFiltered") public void testMergeFiltered(MergeFilteredTest cfg) { final List priority = vcs2priority(cfg.inputs); - final VariantContext merged = VariantContextUtils.simpleMerge( - cfg.inputs, priority, cfg.type, VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false); + final VariantContext merged = GATKVariantContextUtils.simpleMerge( + cfg.inputs, priority, cfg.type, GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false); // test alleles are equal Assert.assertEquals(merged.getAlleles(), cfg.expected.getAlleles()); @@ -487,9 +479,9 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest { @Test(dataProvider = "mergeGenotypes") public void testMergeGenotypes(MergeGenotypesTest cfg) { - final VariantContext merged = VariantContextUtils.simpleMerge( - cfg.inputs, cfg.priority, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, - VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false); + final VariantContext merged = GATKVariantContextUtils.simpleMerge( + cfg.inputs, cfg.priority, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false); // test alleles are equal Assert.assertEquals(merged.getAlleles(), cfg.expected.getAlleles()); @@ -528,9 +520,9 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest { final VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, -1)); final VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, -2)); - final VariantContext merged = VariantContextUtils.simpleMerge( - Arrays.asList(vc1, vc2), null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, - VariantContextUtils.GenotypeMergeType.UNIQUIFY, false, false, "set", false, false); + final VariantContext merged = GATKVariantContextUtils.simpleMerge( + Arrays.asList(vc1, vc2), null, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + GATKVariantContextUtils.GenotypeMergeType.UNIQUIFY, false, false, "set", false, false); // test genotypes Assert.assertEquals(merged.getSampleNames(), new HashSet(Arrays.asList("s1.1", "s1.2"))); @@ -561,12 +553,12 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest { VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS); VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS); - final VariantContext merged = VariantContextUtils.simpleMerge( - Arrays.asList(vc1, vc2), priority, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, - VariantContextUtils.GenotypeMergeType.PRIORITIZE, annotate, false, set, false, false); + final VariantContext merged = GATKVariantContextUtils.simpleMerge( + Arrays.asList(vc1, vc2), priority, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, annotate, false, set, false, false); if ( annotate ) - Assert.assertEquals(merged.getAttribute(set), VariantContextUtils.MERGE_INTERSECTION); + Assert.assertEquals(merged.getAttribute(set), GATKVariantContextUtils.MERGE_INTERSECTION); else Assert.assertFalse(merged.hasAttribute(set)); } @@ -583,78 +575,6 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest { return priority; } - - // -------------------------------------------------------------------------------- - // - // Test repeats - // - // -------------------------------------------------------------------------------- - - private class RepeatDetectorTest extends TestDataProvider { - String ref; - boolean isTrueRepeat; - VariantContext vc; - - private RepeatDetectorTest(boolean isTrueRepeat, String ref, String refAlleleString, String ... altAlleleStrings) { - super(RepeatDetectorTest.class); - this.isTrueRepeat = isTrueRepeat; - this.ref = ref; - - List alleles = new LinkedList(); - final Allele refAllele = Allele.create(refAlleleString, true); - alleles.add(refAllele); - for ( final String altString: altAlleleStrings) { - final Allele alt = Allele.create(altString, false); - alleles.add(alt); - } - - VariantContextBuilder builder = new VariantContextBuilder("test", "chr1", 1, refAllele.length(), alleles); - this.vc = builder.make(); - } - - public String toString() { - return String.format("%s refBases=%s trueRepeat=%b vc=%s", super.toString(), ref, isTrueRepeat, vc); - } - } - - @DataProvider(name = "RepeatDetectorTest") - public Object[][] makeRepeatDetectorTest() { - new RepeatDetectorTest(true, "NAAC", "N", "NA"); - new RepeatDetectorTest(true, "NAAC", "NA", "N"); - new RepeatDetectorTest(false, "NAAC", "NAA", "N"); - new RepeatDetectorTest(false, "NAAC", "N", "NC"); - new RepeatDetectorTest(false, "AAC", "A", "C"); - - // running out of ref bases => false - new RepeatDetectorTest(false, "NAAC", "N", "NCAGTA"); - - // complex repeats - new RepeatDetectorTest(true, "NATATATC", "N", "NAT"); - new RepeatDetectorTest(true, "NATATATC", "N", "NATA"); - new RepeatDetectorTest(true, "NATATATC", "N", "NATAT"); - new RepeatDetectorTest(true, "NATATATC", "NAT", "N"); - new RepeatDetectorTest(false, "NATATATC", "NATA", "N"); - new RepeatDetectorTest(false, "NATATATC", "NATAT", "N"); - - // multi-allelic - new RepeatDetectorTest(true, "NATATATC", "N", "NAT", "NATAT"); - new RepeatDetectorTest(true, "NATATATC", "N", "NAT", "NATA"); - new RepeatDetectorTest(true, "NATATATC", "NAT", "N", "NATAT"); - new RepeatDetectorTest(true, "NATATATC", "NAT", "N", "NATA"); // two As - new RepeatDetectorTest(false, "NATATATC", "NAT", "N", "NATC"); // false - new RepeatDetectorTest(false, "NATATATC", "NAT", "N", "NCC"); // false - new RepeatDetectorTest(false, "NATATATC", "NAT", "NATAT", "NCC"); // false - - return RepeatDetectorTest.getTests(RepeatDetectorTest.class); - } - - @Test(dataProvider = "RepeatDetectorTest") - public void testRepeatDetectorTest(RepeatDetectorTest cfg) { - - // test alleles are equal - Assert.assertEquals(VariantContextUtils.isTandemRepeat(cfg.vc, cfg.ref.getBytes()), cfg.isTrueRepeat); - } - // -------------------------------------------------------------------------------- // // basic allele clipping test @@ -698,10 +618,11 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest { @Test(dataProvider = "ReverseClippingPositionTestProvider") public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) { - int result = VariantContextUtils.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false); + int result = GATKVariantContextUtils.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false); Assert.assertEquals(result, cfg.expectedClip); } + // -------------------------------------------------------------------------------- // // test splitting into bi-allelics @@ -776,7 +697,7 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest { @Test(dataProvider = "SplitBiallelics") public void testSplitBiallelicsNoGenotypes(final VariantContext vc, final List expectedBiallelics) { - final List biallelics = VariantContextUtils.splitVariantContextToBiallelics(vc); + final List biallelics = GATKVariantContextUtils.splitVariantContextToBiallelics(vc); Assert.assertEquals(biallelics.size(), expectedBiallelics.size()); for ( int i = 0; i < biallelics.size(); i++ ) { final VariantContext actual = biallelics.get(i); @@ -790,14 +711,14 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest { final List genotypes = new ArrayList(); int sampleI = 0; - for ( final List alleles : GeneralUtils.makePermutations(vc.getAlleles(), 2, true) ) { + for ( final List alleles : Utils.makePermutations(vc.getAlleles(), 2, true) ) { genotypes.add(GenotypeBuilder.create("sample" + sampleI++, alleles)); } genotypes.add(GenotypeBuilder.createMissing("missing", 2)); final VariantContext vcWithGenotypes = new VariantContextBuilder(vc).genotypes(genotypes).make(); - final List biallelics = VariantContextUtils.splitVariantContextToBiallelics(vcWithGenotypes); + final List biallelics = GATKVariantContextUtils.splitVariantContextToBiallelics(vcWithGenotypes); for ( int i = 0; i < biallelics.size(); i++ ) { final VariantContext actual = biallelics.get(i); Assert.assertEquals(actual.getNSamples(), vcWithGenotypes.getNSamples()); // not dropping any samples @@ -812,4 +733,159 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest { } } } -} \ No newline at end of file + + + // -------------------------------------------------------------------------------- + // + // Test repeats + // + // -------------------------------------------------------------------------------- + + private class RepeatDetectorTest extends TestDataProvider { + String ref; + boolean isTrueRepeat; + VariantContext vc; + + private RepeatDetectorTest(boolean isTrueRepeat, String ref, String refAlleleString, String ... altAlleleStrings) { + super(RepeatDetectorTest.class); + this.isTrueRepeat = isTrueRepeat; + this.ref = ref; + + List alleles = new LinkedList(); + final Allele refAllele = Allele.create(refAlleleString, true); + alleles.add(refAllele); + for ( final String altString: altAlleleStrings) { + final Allele alt = Allele.create(altString, false); + alleles.add(alt); + } + + VariantContextBuilder builder = new VariantContextBuilder("test", "chr1", 1, refAllele.length(), alleles); + this.vc = builder.make(); + } + + public String toString() { + return String.format("%s refBases=%s trueRepeat=%b vc=%s", super.toString(), ref, isTrueRepeat, vc); + } + } + + @DataProvider(name = "RepeatDetectorTest") + public Object[][] makeRepeatDetectorTest() { + new RepeatDetectorTest(true, "NAAC", "N", "NA"); + new RepeatDetectorTest(true, "NAAC", "NA", "N"); + new RepeatDetectorTest(false, "NAAC", "NAA", "N"); + new RepeatDetectorTest(false, "NAAC", "N", "NC"); + new RepeatDetectorTest(false, "AAC", "A", "C"); + + // running out of ref bases => false + new RepeatDetectorTest(false, "NAAC", "N", "NCAGTA"); + + // complex repeats + new RepeatDetectorTest(true, "NATATATC", "N", "NAT"); + new RepeatDetectorTest(true, "NATATATC", "N", "NATA"); + new RepeatDetectorTest(true, "NATATATC", "N", "NATAT"); + new RepeatDetectorTest(true, "NATATATC", "NAT", "N"); + new RepeatDetectorTest(false, "NATATATC", "NATA", "N"); + new RepeatDetectorTest(false, "NATATATC", "NATAT", "N"); + + // multi-allelic + new RepeatDetectorTest(true, "NATATATC", "N", "NAT", "NATAT"); + new RepeatDetectorTest(true, "NATATATC", "N", "NAT", "NATA"); + new RepeatDetectorTest(true, "NATATATC", "NAT", "N", "NATAT"); + new RepeatDetectorTest(true, "NATATATC", "NAT", "N", "NATA"); // two As + new RepeatDetectorTest(false, "NATATATC", "NAT", "N", "NATC"); // false + new RepeatDetectorTest(false, "NATATATC", "NAT", "N", "NCC"); // false + new RepeatDetectorTest(false, "NATATATC", "NAT", "NATAT", "NCC"); // false + + return RepeatDetectorTest.getTests(RepeatDetectorTest.class); + } + + @Test(dataProvider = "RepeatDetectorTest") + public void testRepeatDetectorTest(RepeatDetectorTest cfg) { + + // test alleles are equal + Assert.assertEquals(GATKVariantContextUtils.isTandemRepeat(cfg.vc, cfg.ref.getBytes()), cfg.isTrueRepeat); + } + + @Test + public void testRepeatAllele() { + Allele nullR = Allele.create("A", true); + Allele nullA = Allele.create("A", false); + Allele atc = Allele.create("AATC", false); + Allele atcatc = Allele.create("AATCATC", false); + Allele ccccR = Allele.create("ACCCC", true); + Allele cc = Allele.create("ACC", false); + Allele cccccc = Allele.create("ACCCCCC", false); + Allele gagaR = Allele.create("AGAGA", true); + Allele gagagaga = Allele.create("AGAGAGAGA", false); + + // - / ATC [ref] from 20-22 + String delLoc = "chr1"; + int delLocStart = 20; + int delLocStop = 22; + + // - [ref] / ATC from 20-20 + String insLoc = "chr1"; + int insLocStart = 20; + int insLocStop = 20; + + Pair,byte[]> result; + byte[] refBytes = "TATCATCATCGGA".getBytes(); + + Assert.assertEquals(GATKVariantContextUtils.findNumberofRepetitions("ATG".getBytes(), "ATGATGATGATG".getBytes(), true),4); + Assert.assertEquals(GATKVariantContextUtils.findNumberofRepetitions("G".getBytes(), "ATGATGATGATG".getBytes(), true),0); + Assert.assertEquals(GATKVariantContextUtils.findNumberofRepetitions("T".getBytes(), "T".getBytes(), true),1); + Assert.assertEquals(GATKVariantContextUtils.findNumberofRepetitions("AT".getBytes(), "ATGATGATCATG".getBytes(), true),1); + Assert.assertEquals(GATKVariantContextUtils.findNumberofRepetitions("CCC".getBytes(), "CCCCCCCC".getBytes(), true),2); + + Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("ATG".getBytes()),3); + Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("AAA".getBytes()),1); + Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("CACACAC".getBytes()),7); + Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("CACACA".getBytes()),2); + Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("CATGCATG".getBytes()),4); + Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("AATAATA".getBytes()),7); + + + // A*,ATC, context = ATC ATC ATC : (ATC)3 -> (ATC)4 + VariantContext vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(nullR,atc)).make(); + result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, refBytes); + Assert.assertEquals(result.getFirst().toArray()[0],3); + Assert.assertEquals(result.getFirst().toArray()[1],4); + Assert.assertEquals(result.getSecond().length,3); + + // ATC*,A,ATCATC + vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+3, Arrays.asList(Allele.create("AATC", true),nullA,atcatc)).make(); + result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, refBytes); + Assert.assertEquals(result.getFirst().toArray()[0],3); + Assert.assertEquals(result.getFirst().toArray()[1],2); + Assert.assertEquals(result.getFirst().toArray()[2],4); + Assert.assertEquals(result.getSecond().length,3); + + // simple non-tandem deletion: CCCC*, - + refBytes = "TCCCCCCCCATG".getBytes(); + vc = new VariantContextBuilder("foo", delLoc, 10, 14, Arrays.asList(ccccR,nullA)).make(); + result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, refBytes); + Assert.assertEquals(result.getFirst().toArray()[0],8); + Assert.assertEquals(result.getFirst().toArray()[1],4); + Assert.assertEquals(result.getSecond().length,1); + + // CCCC*,CC,-,CCCCCC, context = CCC: (C)7 -> (C)5,(C)3,(C)9 + refBytes = "TCCCCCCCAGAGAGAG".getBytes(); + vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+4, Arrays.asList(ccccR,cc, nullA,cccccc)).make(); + result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, refBytes); + Assert.assertEquals(result.getFirst().toArray()[0],7); + Assert.assertEquals(result.getFirst().toArray()[1],5); + Assert.assertEquals(result.getFirst().toArray()[2],3); + Assert.assertEquals(result.getFirst().toArray()[3],9); + Assert.assertEquals(result.getSecond().length,1); + + // GAGA*,-,GAGAGAGA + refBytes = "TGAGAGAGAGATTT".getBytes(); + vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+4, Arrays.asList(gagaR, nullA,gagagaga)).make(); + result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, refBytes); + Assert.assertEquals(result.getFirst().toArray()[0],5); + Assert.assertEquals(result.getFirst().toArray()[1],3); + Assert.assertEquals(result.getFirst().toArray()[2],7); + Assert.assertEquals(result.getSecond().length,2); + + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/variant/VariantContextBenchmark.java b/public/java/test/org/broadinstitute/sting/utils/variant/VariantContextBenchmark.java index 8a6d2138e..51a47d86d 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variant/VariantContextBenchmark.java +++ b/public/java/test/org/broadinstitute/sting/utils/variant/VariantContextBenchmark.java @@ -231,9 +231,9 @@ public class VariantContextBenchmark extends SimpleBenchmark { toMerge.add(new VariantContextBuilder(vc).genotypes(gc).make()); } - VariantContextUtils.simpleMerge(toMerge, null, - VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, - VariantContextUtils.GenotypeMergeType.UNSORTED, + GATKVariantContextUtils.simpleMerge(toMerge, null, + GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + GATKVariantContextUtils.GenotypeMergeType.UNSORTED, true, false, "set", false, true); } }; diff --git a/public/java/test/org/broadinstitute/variant/VariantBaseTest.java b/public/java/test/org/broadinstitute/variant/VariantBaseTest.java index be70b22e6..0d7d5a82e 100644 --- a/public/java/test/org/broadinstitute/variant/VariantBaseTest.java +++ b/public/java/test/org/broadinstitute/variant/VariantBaseTest.java @@ -1,3 +1,28 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + package org.broadinstitute.variant; import org.testng.Assert; diff --git a/public/java/test/org/broadinstitute/variant/variantcontext/VariantContextTestProvider.java b/public/java/test/org/broadinstitute/variant/variantcontext/VariantContextTestProvider.java index d739e4aa5..4c948e8e2 100644 --- a/public/java/test/org/broadinstitute/variant/variantcontext/VariantContextTestProvider.java +++ b/public/java/test/org/broadinstitute/variant/variantcontext/VariantContextTestProvider.java @@ -32,7 +32,6 @@ import org.broadinstitute.variant.VariantBaseTest; import org.broadinstitute.variant.bcf2.BCF2Codec; import org.broadinstitute.variant.utils.GeneralUtils; import org.broadinstitute.variant.vcf.*; -import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.variant.variantcontext.writer.Options; import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import org.testng.Assert; @@ -74,6 +73,24 @@ public class VariantContextTestProvider { } } + public static class VariantContextContainer { + private VCFHeader header; + private Iterable vcs; + + public VariantContextContainer( VCFHeader header, Iterable vcs ) { + this.header = header; + this.vcs = vcs; + } + + public VCFHeader getHeader() { + return header; + } + + public Iterable getVCs() { + return vcs; + } + } + public abstract static class VariantContextIOTest { public String toString() { return "VariantContextIOTest:" + getExtension(); @@ -150,15 +167,15 @@ public class VariantContextTestProvider { if ( ENABLE_SOURCE_VCF_TESTS ) { for ( final File file : testSourceVCFs ) { VCFCodec codec = new VCFCodec(); - Pair> x = readAllVCs( file, codec ); + VariantContextContainer x = readAllVCs( file, codec ); List fullyDecoded = new ArrayList(); - for ( final VariantContext raw : x.getSecond() ) { + for ( final VariantContext raw : x.getVCs() ) { if ( raw != null ) - fullyDecoded.add(raw.fullyDecode(x.getFirst(), false)); + fullyDecoded.add(raw.fullyDecode(x.getHeader(), false)); } - TEST_DATAs.add(new VariantContextTestData(x.getFirst(), fullyDecoded)); + TEST_DATAs.add(new VariantContextTestData(x.getHeader(), fullyDecoded)); } } } @@ -616,8 +633,8 @@ public class VariantContextTestProvider { writeVCsToFile(writer, header, data.vcs); // ensure writing of expected == actual - final Pair> p = readAllVCs(tmpFile, tester.makeCodec()); - final Iterable actual = p.getSecond(); + final VariantContextContainer p = readAllVCs(tmpFile, tester.makeCodec()); + final Iterable actual = p.getVCs(); int i = 0; for ( final VariantContext readVC : actual ) { @@ -655,14 +672,14 @@ public class VariantContextTestProvider { writeVCsToFile(writer, header, vcs); // ensure writing of expected == actual - final Pair> p = readAllVCs(tmpFile, tester.makeCodec()); - final Iterable actual = p.getSecond(); + final VariantContextContainer p = readAllVCs(tmpFile, tester.makeCodec()); + final Iterable actual = p.getVCs(); assertEquals(actual, expected); if ( recurse ) { // if we are doing a recursive test, grab a fresh iterator over the written values - final Iterable read = readAllVCs(tmpFile, tester.makeCodec()).getSecond(); - testReaderWriter(tester, p.getFirst(), expected, read, false); + final Iterable read = readAllVCs(tmpFile, tester.makeCodec()).getVCs(); + testReaderWriter(tester, p.getHeader(), expected, read, false); } } @@ -683,7 +700,7 @@ public class VariantContextTestProvider { * @return * @throws IOException */ - public final static Pair> readAllVCs( final File source, final FeatureCodec codec ) throws IOException { + public final static VariantContextContainer readAllVCs( final File source, final FeatureCodec codec ) throws IOException { // read in the features PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(source)); FeatureCodecHeader header = codec.readHeader(pbs); @@ -693,7 +710,7 @@ public class VariantContextTestProvider { pbs.skip(header.getHeaderEnd()); final VCFHeader vcfHeader = (VCFHeader)header.getHeaderValue(); - return new Pair>(vcfHeader, new VCIterable(pbs, codec, vcfHeader)); + return new VariantContextContainer(vcfHeader, new VCIterable(pbs, codec, vcfHeader)); } public static class VCIterable implements Iterable, Iterator { @@ -738,10 +755,10 @@ public class VariantContextTestProvider { } public static void assertVCFandBCFFilesAreTheSame(final File vcfFile, final File bcfFile) throws IOException { - final Pair> vcfData = readAllVCs(vcfFile, new VCFCodec()); - final Pair> bcfData = readAllVCs(bcfFile, new BCF2Codec()); - assertEquals(bcfData.getFirst(), vcfData.getFirst()); - assertEquals(bcfData.getSecond(), vcfData.getSecond()); + final VariantContextContainer vcfData = readAllVCs(vcfFile, new VCFCodec()); + final VariantContextContainer bcfData = readAllVCs(bcfFile, new BCF2Codec()); + assertEquals(bcfData.getHeader(), vcfData.getHeader()); + assertEquals(bcfData.getVCs(), vcfData.getVCs()); } public static void assertEquals(final Iterable actual, final Iterable expected) { diff --git a/public/java/test/org/broadinstitute/variant/variantcontext/VariantContextUnitTest.java b/public/java/test/org/broadinstitute/variant/variantcontext/VariantContextUnitTest.java index 25e9878ae..103c8ab3b 100644 --- a/public/java/test/org/broadinstitute/variant/variantcontext/VariantContextUnitTest.java +++ b/public/java/test/org/broadinstitute/variant/variantcontext/VariantContextUnitTest.java @@ -28,9 +28,7 @@ package org.broadinstitute.variant.variantcontext; // the imports for unit testing. - import org.broadinstitute.variant.VariantBaseTest; -import org.broadinstitute.variant.utils.Pair; import org.testng.annotations.BeforeSuite; import org.testng.annotations.BeforeMethod; import org.testng.annotations.DataProvider; @@ -484,78 +482,6 @@ public class VariantContextUnitTest extends VariantBaseTest { Assert.assertNotNull(vc.getFiltersMaybeNull()); } - @Test - public void testRepeatAllele() { - Allele nullR = Allele.create("A", true); - Allele nullA = Allele.create("A", false); - Allele atc = Allele.create("AATC", false); - Allele atcatc = Allele.create("AATCATC", false); - Allele ccccR = Allele.create("ACCCC", true); - Allele cc = Allele.create("ACC", false); - Allele cccccc = Allele.create("ACCCCCC", false); - Allele gagaR = Allele.create("AGAGA", true); - Allele gagagaga = Allele.create("AGAGAGAGA", false); - - Pair,byte[]> result; - byte[] refBytes = "TATCATCATCGGA".getBytes(); - - Assert.assertEquals(VariantContextUtils.findNumberofRepetitions("ATG".getBytes(), "ATGATGATGATG".getBytes(), true),4); - Assert.assertEquals(VariantContextUtils.findNumberofRepetitions("G".getBytes(), "ATGATGATGATG".getBytes(), true),0); - Assert.assertEquals(VariantContextUtils.findNumberofRepetitions("T".getBytes(), "T".getBytes(), true),1); - Assert.assertEquals(VariantContextUtils.findNumberofRepetitions("AT".getBytes(), "ATGATGATCATG".getBytes(), true),1); - Assert.assertEquals(VariantContextUtils.findNumberofRepetitions("CCC".getBytes(), "CCCCCCCC".getBytes(), true),2); - - Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("ATG".getBytes()),3); - Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("AAA".getBytes()),1); - Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("CACACAC".getBytes()),7); - Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("CACACA".getBytes()),2); - Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("CATGCATG".getBytes()),4); - Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("AATAATA".getBytes()),7); - - - // A*,ATC, context = ATC ATC ATC : (ATC)3 -> (ATC)4 - VariantContext vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(nullR,atc)).make(); - result = VariantContextUtils.getNumTandemRepeatUnits(vc, refBytes); - Assert.assertEquals(result.getFirst().toArray()[0],3); - Assert.assertEquals(result.getFirst().toArray()[1],4); - Assert.assertEquals(result.getSecond().length,3); - - // ATC*,A,ATCATC - vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+3, Arrays.asList(Allele.create("AATC", true),nullA,atcatc)).make(); - result = VariantContextUtils.getNumTandemRepeatUnits(vc, refBytes); - Assert.assertEquals(result.getFirst().toArray()[0],3); - Assert.assertEquals(result.getFirst().toArray()[1],2); - Assert.assertEquals(result.getFirst().toArray()[2],4); - Assert.assertEquals(result.getSecond().length,3); - - // simple non-tandem deletion: CCCC*, - - refBytes = "TCCCCCCCCATG".getBytes(); - vc = new VariantContextBuilder("foo", delLoc, 10, 14, Arrays.asList(ccccR,nullA)).make(); - result = VariantContextUtils.getNumTandemRepeatUnits(vc, refBytes); - Assert.assertEquals(result.getFirst().toArray()[0],8); - Assert.assertEquals(result.getFirst().toArray()[1],4); - Assert.assertEquals(result.getSecond().length,1); - - // CCCC*,CC,-,CCCCCC, context = CCC: (C)7 -> (C)5,(C)3,(C)9 - refBytes = "TCCCCCCCAGAGAGAG".getBytes(); - vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+4, Arrays.asList(ccccR,cc, nullA,cccccc)).make(); - result = VariantContextUtils.getNumTandemRepeatUnits(vc, refBytes); - Assert.assertEquals(result.getFirst().toArray()[0],7); - Assert.assertEquals(result.getFirst().toArray()[1],5); - Assert.assertEquals(result.getFirst().toArray()[2],3); - Assert.assertEquals(result.getFirst().toArray()[3],9); - Assert.assertEquals(result.getSecond().length,1); - - // GAGA*,-,GAGAGAGA - refBytes = "TGAGAGAGAGATTT".getBytes(); - vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+4, Arrays.asList(gagaR, nullA,gagagaga)).make(); - result = VariantContextUtils.getNumTandemRepeatUnits(vc, refBytes); - Assert.assertEquals(result.getFirst().toArray()[0],5); - Assert.assertEquals(result.getFirst().toArray()[1],3); - Assert.assertEquals(result.getFirst().toArray()[2],7); - Assert.assertEquals(result.getSecond().length,2); - - } @Test public void testGetGenotypeCounts() { List alleles = Arrays.asList(Aref, T);