Move some VCF/VariantContext methods back to the GATK based on feedback

-Moved some of the more specialized / complex VariantContext and VCF utility
 methods back to the GATK.

-Due to this re-shuffling, was able to return things like the Pair class back
 to the GATK as well.
This commit is contained in:
David Roazen 2013-01-29 13:57:39 -05:00
parent 581df64197
commit a536e1da84
87 changed files with 1408 additions and 1386 deletions

View File

@ -57,7 +57,7 @@ import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.MannWhitneyU; import org.broadinstitute.sting.utils.MannWhitneyU;
import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.variant.vcf.VCFHeaderLine; import org.broadinstitute.variant.vcf.VCFHeaderLine;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.variant.variantcontext.Allele; import org.broadinstitute.variant.variantcontext.Allele;

View File

@ -53,8 +53,8 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.VariantContextUtils; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.vcf.VCFHeaderLineCount; import org.broadinstitute.variant.vcf.VCFHeaderLineCount;
import org.broadinstitute.variant.vcf.VCFHeaderLineType; import org.broadinstitute.variant.vcf.VCFHeaderLineType;
import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; import org.broadinstitute.variant.vcf.VCFInfoHeaderLine;
@ -79,7 +79,7 @@ public class TandemRepeatAnnotator extends InfoFieldAnnotation implements Standa
if ( !vc.isIndel()) if ( !vc.isIndel())
return null; return null;
Pair<List<Integer>,byte[]> result = VariantContextUtils.getNumTandemRepeatUnits(vc, ref.getForwardBases()); Pair<List<Integer>,byte[]> result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, ref.getForwardBases());
if (result == null) if (result == null)
return null; return null;

View File

@ -63,7 +63,7 @@ import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.clipping.ReadClipper;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;

View File

@ -49,7 +49,7 @@ package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileHeader;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator; import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

View File

@ -46,7 +46,7 @@
package org.broadinstitute.sting.gatk.walkers.compression.reducereads; package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator; import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

View File

@ -53,7 +53,7 @@ import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator; import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.gatk.downsampling.ReservoirDownsampler; import org.broadinstitute.sting.gatk.downsampling.ReservoirDownsampler;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.recalibration.EventType; import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator; import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;

View File

@ -53,7 +53,8 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.clipping.ReadClipper;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -288,7 +289,7 @@ public class ConsensusAlleleCounter {
if (vcs.isEmpty()) if (vcs.isEmpty())
return Collections.emptyList(); // nothing else to do, no alleles passed minimum count criterion return Collections.emptyList(); // nothing else to do, no alleles passed minimum count criterion
final VariantContext mergedVC = VariantContextUtils.simpleMerge(vcs, null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.UNSORTED, false, false, null, false, false); final VariantContext mergedVC = GATKVariantContextUtils.simpleMerge(vcs, null, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, GATKVariantContextUtils.GenotypeMergeType.UNSORTED, false, false, null, false, false);
return mergedVC.getAlleles(); return mergedVC.getAlleles();
} }
} }

View File

@ -51,7 +51,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactACcounts;
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactACset; import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactACset;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.variant.vcf.VCFConstants; import org.broadinstitute.variant.vcf.VCFConstants;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;

View File

@ -56,7 +56,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.variant.vcf.VCFConstants; import org.broadinstitute.variant.vcf.VCFConstants;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.variant.variantcontext.*; import org.broadinstitute.variant.variantcontext.*;

View File

@ -49,8 +49,8 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection; import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection;
import org.broadinstitute.sting.utils.pairhmm.PairHMM; import org.broadinstitute.sting.utils.pairhmm.PairHMM;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.VariantContext; import org.broadinstitute.variant.variantcontext.VariantContext;
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
public class UnifiedArgumentCollection extends StandardCallerArgumentCollection { public class UnifiedArgumentCollection extends StandardCallerArgumentCollection {
@ -172,7 +172,7 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
Sample ploidy - equivalent to number of chromosomes per pool. In pooled experiments this should be = # of samples in pool * individual sample ploidy Sample ploidy - equivalent to number of chromosomes per pool. In pooled experiments this should be = # of samples in pool * individual sample ploidy
*/ */
@Argument(shortName="ploidy", fullName="sample_ploidy", doc="Plody (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy).", required=false) @Argument(shortName="ploidy", fullName="sample_ploidy", doc="Plody (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy).", required=false)
public int samplePloidy = VariantContextUtils.DEFAULT_PLOIDY; public int samplePloidy = GATKVariantContextUtils.DEFAULT_PLOIDY;
@Hidden @Hidden
@Argument(shortName="minqs", fullName="min_quality_score", doc="Min quality score to consider. Smaller numbers process faster. Default: Q1.", required=false) @Argument(shortName="minqs", fullName="min_quality_score", doc="Min quality score to consider. Smaller numbers process faster. Default: Q1.", required=false)

View File

@ -61,7 +61,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.variant.variantcontext.VariantContextUtils; import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.vcf.*; import org.broadinstitute.variant.vcf.*;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
@ -304,7 +304,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
headerInfo.add(new VCFInfoHeaderLine(UnifiedGenotyperEngine.NUMBER_OF_DISCOVERED_ALLELES_KEY, 1, VCFHeaderLineType.Integer, "Number of alternate alleles discovered (but not necessarily genotyped) at this site")); headerInfo.add(new VCFInfoHeaderLine(UnifiedGenotyperEngine.NUMBER_OF_DISCOVERED_ALLELES_KEY, 1, VCFHeaderLineType.Integer, "Number of alternate alleles discovered (but not necessarily genotyped) at this site"));
// add the pool values for each genotype // add the pool values for each genotype
if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) { if (UAC.samplePloidy != GATKVariantContextUtils.DEFAULT_PLOIDY) {
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the alternate allele count, in the same order as listed, for each individual sample")); headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the alternate allele count, in the same order as listed, for each individual sample"));
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the alternate allele fraction, in the same order as listed, for each individual sample")); headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the alternate allele fraction, in the same order as listed, for each individual sample"));
} }

View File

@ -61,6 +61,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcResult;
import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.variant.vcf.VCFConstants; import org.broadinstitute.variant.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
@ -134,7 +135,7 @@ public class UnifiedGenotyperEngine {
// --------------------------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------------------------
@Requires({"toolkit != null", "UAC != null"}) @Requires({"toolkit != null", "UAC != null"})
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) { public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) {
this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), VariantContextUtils.DEFAULT_PLOIDY); this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), GATKVariantContextUtils.DEFAULT_PLOIDY);
} }
@Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0","ploidy>0"}) @Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0","ploidy>0"})
@ -525,7 +526,7 @@ public class UnifiedGenotyperEngine {
// if we are subsetting alleles (either because there were too many or because some were not polymorphic) // if we are subsetting alleles (either because there were too many or because some were not polymorphic)
// then we may need to trim the alleles (because the original VariantContext may have had to pad at the end). // then we may need to trim the alleles (because the original VariantContext may have had to pad at the end).
if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext ) // limitedContext callers need to handle allele trimming on their own to keep their perReadAlleleLikelihoodMap alleles in sync if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext ) // limitedContext callers need to handle allele trimming on their own to keep their perReadAlleleLikelihoodMap alleles in sync
vcCall = VariantContextUtils.reverseTrimAlleles(vcCall); vcCall = GATKVariantContextUtils.reverseTrimAlleles(vcCall);
if ( annotationEngine != null && !limitedContext ) { // limitedContext callers need to handle annotations on their own by calling their own annotationEngine if ( annotationEngine != null && !limitedContext ) { // limitedContext callers need to handle annotations on their own by calling their own annotationEngine
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations // Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
@ -662,7 +663,7 @@ public class UnifiedGenotyperEngine {
private void determineGLModelsToUse() { private void determineGLModelsToUse() {
String modelPrefix = ""; String modelPrefix = "";
if ( !UAC.GLmodel.name().contains(GPSTRING) && UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY ) if ( !UAC.GLmodel.name().contains(GPSTRING) && UAC.samplePloidy != GATKVariantContextUtils.DEFAULT_PLOIDY )
modelPrefix = GPSTRING; modelPrefix = GPSTRING;
if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") ) { if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") ) {

View File

@ -47,6 +47,7 @@
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.*; import org.broadinstitute.variant.variantcontext.*;
import java.util.*; import java.util.*;
@ -105,7 +106,7 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
alleles.add(vc.getReference()); alleles.add(vc.getReference());
alleles.addAll(chooseMostLikelyAlternateAlleles(vc, getMaxAltAlleles())); alleles.addAll(chooseMostLikelyAlternateAlleles(vc, getMaxAltAlleles()));
builder.alleles(alleles); builder.alleles(alleles);
builder.genotypes(VariantContextUtils.subsetDiploidAlleles(vc, alleles, false)); builder.genotypes(GATKVariantContextUtils.subsetDiploidAlleles(vc, alleles, false));
return builder.make(); return builder.make();
} else { } else {
return vc; return vc;
@ -351,6 +352,6 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
final List<Allele> allelesToUse, final List<Allele> allelesToUse,
final boolean assignGenotypes, final boolean assignGenotypes,
final int ploidy) { final int ploidy) {
return VariantContextUtils.subsetDiploidAlleles(vc, allelesToUse, assignGenotypes); return GATKVariantContextUtils.subsetDiploidAlleles(vc, allelesToUse, assignGenotypes);
} }
} }

View File

@ -47,10 +47,10 @@
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.Allele; import org.broadinstitute.variant.variantcontext.Allele;
import org.broadinstitute.variant.variantcontext.Genotype; import org.broadinstitute.variant.variantcontext.Genotype;
import org.broadinstitute.variant.variantcontext.GenotypesContext; import org.broadinstitute.variant.variantcontext.GenotypesContext;
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
import java.util.ArrayList; import java.util.ArrayList;
@ -92,7 +92,7 @@ abstract class ExactAFCalc extends AFCalc {
if ( sample.hasLikelihoods() ) { if ( sample.hasLikelihoods() ) {
double[] gls = sample.getLikelihoods().getAsVector(); double[] gls = sample.getLikelihoods().getAsVector();
if ( MathUtils.sum(gls) < VariantContextUtils.SUM_GL_THRESH_NOCALL ) if ( MathUtils.sum(gls) < GATKVariantContextUtils.SUM_GL_THRESH_NOCALL )
genotypeLikelihoods.add(gls); genotypeLikelihoods.add(gls);
} }
} }

View File

@ -48,6 +48,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
import org.broadinstitute.sting.gatk.walkers.genotyper.GeneralPloidyGenotypeLikelihoods; import org.broadinstitute.sting.gatk.walkers.genotyper.GeneralPloidyGenotypeLikelihoods;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.vcf.VCFConstants; import org.broadinstitute.variant.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.variant.variantcontext.*; import org.broadinstitute.variant.variantcontext.*;
@ -553,7 +554,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
} }
// if there is no mass on the (new) likelihoods, then just no-call the sample // if there is no mass on the (new) likelihoods, then just no-call the sample
if ( MathUtils.sum(newLikelihoods) > VariantContextUtils.SUM_GL_THRESH_NOCALL ) { if ( MathUtils.sum(newLikelihoods) > GATKVariantContextUtils.SUM_GL_THRESH_NOCALL ) {
newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES)); newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES));
} }
else { else {
@ -565,7 +566,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
gb.PL(newLikelihoods); gb.PL(newLikelihoods);
// if we weren't asked to assign a genotype, then just no-call the sample // if we weren't asked to assign a genotype, then just no-call the sample
if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > VariantContextUtils.SUM_GL_THRESH_NOCALL ) if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > GATKVariantContextUtils.SUM_GL_THRESH_NOCALL )
gb.alleles(NO_CALL_ALLELES); gb.alleles(NO_CALL_ALLELES);
else else
assignGenotype(gb, newLikelihoods, allelesToUse, ploidy); assignGenotype(gb, newLikelihoods, allelesToUse, ploidy);

View File

@ -47,7 +47,7 @@
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc; package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.variantcontext.Allele; import org.broadinstitute.variant.variantcontext.Allele;
import org.broadinstitute.variant.variantcontext.VariantContext; import org.broadinstitute.variant.variantcontext.VariantContext;

View File

@ -57,6 +57,7 @@ import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.variant.variantcontext.*; import org.broadinstitute.variant.variantcontext.*;
@ -173,7 +174,7 @@ public class GenotypingEngine {
validatePriorityList( priorityList, eventsAtThisLoc ); validatePriorityList( priorityList, eventsAtThisLoc );
// Merge the event to find a common reference representation // Merge the event to find a common reference representation
final VariantContext mergedVC = VariantContextUtils.simpleMerge(eventsAtThisLoc, priorityList, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false); final VariantContext mergedVC = GATKVariantContextUtils.simpleMerge(eventsAtThisLoc, priorityList, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
if( mergedVC == null ) { continue; } if( mergedVC == null ) { continue; }
if( eventsAtThisLoc.size() != mergedVC.getAlternateAlleles().size() ) { if( eventsAtThisLoc.size() != mergedVC.getAlternateAlleles().size() ) {
@ -203,7 +204,7 @@ public class GenotypingEngine {
VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, call); VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, call);
if( annotatedCall.getAlleles().size() != mergedVC.getAlleles().size() ) { // some alleles were removed so reverseTrimming might be necessary! if( annotatedCall.getAlleles().size() != mergedVC.getAlleles().size() ) { // some alleles were removed so reverseTrimming might be necessary!
annotatedCall = VariantContextUtils.reverseTrimAlleles(annotatedCall); annotatedCall = GATKVariantContextUtils.reverseTrimAlleles(annotatedCall);
} }
returnCalls.add( annotatedCall ); returnCalls.add( annotatedCall );

View File

@ -72,6 +72,7 @@ import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState; import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState;
import org.broadinstitute.sting.utils.activeregion.ActivityProfileState; import org.broadinstitute.sting.utils.activeregion.ActivityProfileState;
import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.clipping.ReadClipper;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.vcf.*; import org.broadinstitute.variant.vcf.*;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
@ -297,7 +298,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
samplesList.addAll( samples ); samplesList.addAll( samples );
// initialize the UnifiedGenotyper Engine which is used to call into the exact model // initialize the UnifiedGenotyper Engine which is used to call into the exact model
final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection( SCAC ); // this adapter is used so that the full set of unused UG arguments aren't exposed to the HC user final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection( SCAC ); // this adapter is used so that the full set of unused UG arguments aren't exposed to the HC user
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY); UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY);
// create a UAC but with the exactCallsLog = null, so we only output the log for the HC caller itself, if requested // create a UAC but with the exactCallsLog = null, so we only output the log for the HC caller itself, if requested
UnifiedArgumentCollection simpleUAC = new UnifiedArgumentCollection(UAC); UnifiedArgumentCollection simpleUAC = new UnifiedArgumentCollection(UAC);
@ -307,7 +308,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
simpleUAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.min( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING ); // low values used for isActive determination only, default/user-specified values used for actual calling simpleUAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.min( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING ); // low values used for isActive determination only, default/user-specified values used for actual calling
simpleUAC.CONTAMINATION_FRACTION = 0.0; simpleUAC.CONTAMINATION_FRACTION = 0.0;
simpleUAC.exactCallsLog = null; simpleUAC.exactCallsLog = null;
UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), simpleUAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY); UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), simpleUAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY);
// initialize the output VCF header // initialize the output VCF header
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit()); annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());

View File

@ -63,7 +63,7 @@ import org.broadinstitute.sting.gatk.walkers.BAQMode;
import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;

View File

@ -59,6 +59,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils; import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.vcf.*; import org.broadinstitute.variant.vcf.*;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
@ -444,7 +445,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
ArrayList<String> rodNames = new ArrayList<String>(); ArrayList<String> rodNames = new ArrayList<String>();
rodNames.add(variantCollection.variants.getName()); rodNames.add(variantCollection.variants.getName());
Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames); Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames);
Set<String> vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); Set<String> vcfSamples = SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
//Get the trios from the families passed as ped //Get the trios from the families passed as ped
setTrios(); setTrios();

View File

@ -58,12 +58,12 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext; import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils; import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.vcf.VCFHeader; import org.broadinstitute.variant.vcf.VCFHeader;
import org.broadinstitute.variant.vcf.VCFHeaderLine; import org.broadinstitute.variant.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.variant.variantcontext.VariantContext; import org.broadinstitute.variant.variantcontext.VariantContext;
import org.broadinstitute.variant.variantcontext.VariantContextBuilder; import org.broadinstitute.variant.variantcontext.VariantContextBuilder;
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.variant.vcf.VCFUtils; import org.broadinstitute.variant.vcf.VCFUtils;
@ -327,7 +327,7 @@ public class GenotypeAndValidate extends RodWalker<GenotypeAndValidate.CountedDa
// Initialize VCF header // Initialize VCF header
if (vcfWriter != null) { if (vcfWriter != null) {
Map<String, VCFHeader> header = GATKVCFUtils.getVCFHeadersFromRodPrefix(getToolkit(), alleles.getName()); Map<String, VCFHeader> header = GATKVCFUtils.getVCFHeadersFromRodPrefix(getToolkit(), alleles.getName());
samples = SampleUtils.getSampleList(header, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); samples = SampleUtils.getSampleList(header, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(header.values(), true); Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(header.values(), true);
headerLines.add(new VCFHeaderLine("source", "GenotypeAndValidate")); headerLines.add(new VCFHeaderLine("source", "GenotypeAndValidate"));
vcfWriter.writeHeader(new VCFHeader(headerLines, samples)); vcfWriter.writeHeader(new VCFHeader(headerLines, samples));

View File

@ -54,12 +54,12 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.vcf.VCFHeader; import org.broadinstitute.variant.vcf.VCFHeader;
import org.broadinstitute.variant.vcf.VCFHeaderLine; import org.broadinstitute.variant.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils; import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.variant.variantcontext.VariantContext; import org.broadinstitute.variant.variantcontext.VariantContext;
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
import java.io.File; import java.io.File;
@ -227,7 +227,7 @@ public class ValidationSiteSelector extends RodWalker<Integer, Integer> {
public void initialize() { public void initialize() {
// Get list of samples to include in the output // Get list of samples to include in the output
Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit()); Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit());
TreeSet<String> vcfSamples = new TreeSet<String>(SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE)); TreeSet<String> vcfSamples = new TreeSet<String>(SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
Collection<String> samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFiles); Collection<String> samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFiles);
Collection<String> samplesFromExpressions = SampleUtils.matchSamplesExpressions(vcfSamples, sampleExpressions); Collection<String> samplesFromExpressions = SampleUtils.matchSamplesExpressions(vcfSamples, sampleExpressions);

View File

@ -61,6 +61,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils; import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.*; import org.broadinstitute.variant.variantcontext.*;
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.variant.vcf.*; import org.broadinstitute.variant.vcf.*;
@ -115,7 +116,7 @@ public class RegenotypeVariants extends RodWalker<Integer, Integer> implements T
String trackName = variantCollection.variants.getName(); String trackName = variantCollection.variants.getName();
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName)); Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName));
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY); UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY);
final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>(); final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(trackName))); hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(trackName)));

View File

@ -51,7 +51,7 @@ import com.google.java.contract.Requires;
import org.apache.commons.math.MathException; import org.apache.commons.math.MathException;
import org.apache.commons.math.stat.inference.ChiSquareTestImpl; import org.apache.commons.math.stat.inference.ChiSquareTestImpl;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.Collection; import java.util.Collection;

View File

@ -58,7 +58,7 @@ import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray; import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.collections.NestedHashMap; import org.broadinstitute.sting.utils.collections.NestedHashMap;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;

View File

@ -51,7 +51,7 @@ import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray; import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate; import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;

View File

@ -51,9 +51,9 @@ import com.google.java.contract.Requires;
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
import org.broadinstitute.sting.utils.recalibration.ReadCovariates; import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
@ -112,7 +112,7 @@ public abstract class RepeatCovariate implements ExperimentalCovariate {
// get backward repeat unit and # repeats // get backward repeat unit and # repeats
byte[] backwardRepeatUnit = Arrays.copyOfRange(readBases, offset - str + 1, offset + 1); byte[] backwardRepeatUnit = Arrays.copyOfRange(readBases, offset - str + 1, offset + 1);
maxBW = VariantContextUtils.findNumberofRepetitions(backwardRepeatUnit, Arrays.copyOfRange(readBases, 0, offset + 1), false); maxBW = GATKVariantContextUtils.findNumberofRepetitions(backwardRepeatUnit, Arrays.copyOfRange(readBases, 0, offset + 1), false);
if (maxBW > 1) { if (maxBW > 1) {
bestBWRepeatUnit = backwardRepeatUnit.clone(); bestBWRepeatUnit = backwardRepeatUnit.clone();
break; break;
@ -132,7 +132,7 @@ public abstract class RepeatCovariate implements ExperimentalCovariate {
// get forward repeat unit and # repeats // get forward repeat unit and # repeats
byte[] forwardRepeatUnit = Arrays.copyOfRange(readBases, offset +1, offset+str+1); byte[] forwardRepeatUnit = Arrays.copyOfRange(readBases, offset +1, offset+str+1);
maxFW = VariantContextUtils.findNumberofRepetitions(forwardRepeatUnit,Arrays.copyOfRange(readBases, offset+1, readBases.length), true); maxFW = GATKVariantContextUtils.findNumberofRepetitions(forwardRepeatUnit, Arrays.copyOfRange(readBases, offset + 1, readBases.length), true);
if (maxFW > 1) { if (maxFW > 1) {
bestFWRepeatUnit = forwardRepeatUnit.clone(); bestFWRepeatUnit = forwardRepeatUnit.clone();
break; break;
@ -150,7 +150,7 @@ public abstract class RepeatCovariate implements ExperimentalCovariate {
// but correct representation at that place might be (C)4. // but correct representation at that place might be (C)4.
// Hence, if the FW and BW units don't match, check if BW unit can still be a part of FW unit and add // Hence, if the FW and BW units don't match, check if BW unit can still be a part of FW unit and add
// representations to total // representations to total
maxBW = VariantContextUtils.findNumberofRepetitions(bestFWRepeatUnit, Arrays.copyOfRange(readBases, 0, offset + 1), false); maxBW = GATKVariantContextUtils.findNumberofRepetitions(bestFWRepeatUnit, Arrays.copyOfRange(readBases, 0, offset + 1), false);
maxRL = maxFW + maxBW; maxRL = maxFW + maxBW;
bestRepeatUnit = bestFWRepeatUnit; bestRepeatUnit = bestFWRepeatUnit;

View File

@ -48,18 +48,6 @@ package org.broadinstitute.sting.utils.recalibration.covariates;
import com.google.java.contract.Ensures; import com.google.java.contract.Ensures;
import com.google.java.contract.Requires; import com.google.java.contract.Requires;
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
public class RepeatUnitAndLengthCovariate extends RepeatCovariate { public class RepeatUnitAndLengthCovariate extends RepeatCovariate {

View File

@ -46,20 +46,6 @@
package org.broadinstitute.sting.utils.recalibration.covariates; package org.broadinstitute.sting.utils.recalibration.covariates;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
/** /**
* Created with IntelliJ IDEA. * Created with IntelliJ IDEA.
* User: rpoplin * User: rpoplin

View File

@ -52,7 +52,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.variant.variantcontext.*; import org.broadinstitute.variant.variantcontext.*;
import org.testng.Assert; import org.testng.Assert;

View File

@ -49,7 +49,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.variantcontext.Allele; import org.broadinstitute.variant.variantcontext.Allele;
import org.broadinstitute.variant.variantcontext.VariantContext; import org.broadinstitute.variant.variantcontext.VariantContext;
import org.testng.Assert; import org.testng.Assert;

View File

@ -52,7 +52,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.manage
import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.testng.Assert; import org.testng.Assert;
import org.testng.annotations.BeforeClass; import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider; import org.testng.annotations.DataProvider;

View File

@ -49,7 +49,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.variant.variantcontext.Allele; import org.broadinstitute.variant.variantcontext.Allele;

View File

@ -46,21 +46,17 @@
package org.broadinstitute.sting.utils.recalibration; package org.broadinstitute.sting.utils.recalibration;
import com.google.java.contract.Requires; import com.google.java.contract.Requires;
import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection; import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
import org.broadinstitute.sting.utils.recalibration.covariates.*; import org.broadinstitute.sting.utils.recalibration.covariates.*;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
import org.testng.Assert; import org.testng.Assert;
import org.testng.annotations.BeforeClass; import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import java.io.File;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Random; import java.util.Random;
@ -89,38 +85,38 @@ public class RepeatCovariatesUnitTest {
@Test(enabled = true) @Test(enabled = true)
public void testFindNumberOfRepetitions() { public void testFindNumberOfRepetitions() {
// First, test logic to compute number of repetitions of a substring on a given string. // First, test logic to compute number of repetitions of a substring on a given string.
int result = VariantContextUtils.findNumberofRepetitions("AC".getBytes(), "ACAC".getBytes(), true); int result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "ACAC".getBytes(), true);
Assert.assertEquals(2,result); Assert.assertEquals(2,result);
result = VariantContextUtils.findNumberofRepetitions("AC".getBytes(),"ACACACAC".getBytes(), true); result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "ACACACAC".getBytes(), true);
Assert.assertEquals(4,result); Assert.assertEquals(4,result);
result = VariantContextUtils.findNumberofRepetitions("AC".getBytes(),"ACACACACGT".getBytes(), true); result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "ACACACACGT".getBytes(), true);
Assert.assertEquals(4,result); Assert.assertEquals(4,result);
result = VariantContextUtils.findNumberofRepetitions("AC".getBytes(),"GTACACACAC".getBytes(), true); result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "GTACACACAC".getBytes(), true);
Assert.assertEquals(0,result); Assert.assertEquals(0,result);
result = VariantContextUtils.findNumberofRepetitions("GCA".getBytes(),"GTAGGGT".getBytes(), true); result = GATKVariantContextUtils.findNumberofRepetitions("GCA".getBytes(), "GTAGGGT".getBytes(), true);
Assert.assertEquals(0,result); Assert.assertEquals(0,result);
result = VariantContextUtils.findNumberofRepetitions("GCAGCA".getBytes(),"GCAGCAGTAGGGTGTACACACAC".getBytes(), true); result = GATKVariantContextUtils.findNumberofRepetitions("GCAGCA".getBytes(), "GCAGCAGTAGGGTGTACACACAC".getBytes(), true);
Assert.assertEquals(1,result); Assert.assertEquals(1,result);
result = VariantContextUtils.findNumberofRepetitions("GCAGCA".getBytes(),"GTAGGGTGTACACACACGCAGCAT".getBytes(), true); result = GATKVariantContextUtils.findNumberofRepetitions("GCAGCA".getBytes(), "GTAGGGTGTACACACACGCAGCAT".getBytes(), true);
Assert.assertEquals(0,result); Assert.assertEquals(0,result);
result = VariantContextUtils.findNumberofRepetitions("GCA".getBytes(),"GTAGGGTGTACACACACGCAGCAGCA".getBytes(), true); result = GATKVariantContextUtils.findNumberofRepetitions("GCA".getBytes(), "GTAGGGTGTACACACACGCAGCAGCA".getBytes(), true);
Assert.assertEquals(0,result); Assert.assertEquals(0,result);
// Same tests but looking backward on string // Same tests but looking backward on string
result = VariantContextUtils.findNumberofRepetitions("AC".getBytes(),"ACAC".getBytes(), false); result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "ACAC".getBytes(), false);
Assert.assertEquals(2,result); Assert.assertEquals(2,result);
result = VariantContextUtils.findNumberofRepetitions("AC".getBytes(),"ACACACAC".getBytes(), false); result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "ACACACAC".getBytes(), false);
Assert.assertEquals(4,result); Assert.assertEquals(4,result);
result = VariantContextUtils.findNumberofRepetitions("AC".getBytes(),"ACACACACGT".getBytes(), false); result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "ACACACACGT".getBytes(), false);
Assert.assertEquals(0,result); Assert.assertEquals(0,result);
result = VariantContextUtils.findNumberofRepetitions("AC".getBytes(),"GTACACACAC".getBytes(), false); result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "GTACACACAC".getBytes(), false);
Assert.assertEquals(4,result); Assert.assertEquals(4,result);
result = VariantContextUtils.findNumberofRepetitions("GCA".getBytes(),"GTAGGGT".getBytes(), false); result = GATKVariantContextUtils.findNumberofRepetitions("GCA".getBytes(), "GTAGGGT".getBytes(), false);
Assert.assertEquals(0,result); Assert.assertEquals(0,result);
result = VariantContextUtils.findNumberofRepetitions("GCAGCA".getBytes(),"GCAGCAGTAGGGTGTACACACAC".getBytes(), false); result = GATKVariantContextUtils.findNumberofRepetitions("GCAGCA".getBytes(), "GCAGCAGTAGGGTGTACACACAC".getBytes(), false);
Assert.assertEquals(0,result); Assert.assertEquals(0,result);
result = VariantContextUtils.findNumberofRepetitions("GCAGCA".getBytes(),"GTAGGGTGTACACACACGCAGCAT".getBytes(), false); result = GATKVariantContextUtils.findNumberofRepetitions("GCAGCA".getBytes(), "GTAGGGTGTACACACACGCAGCAT".getBytes(), false);
Assert.assertEquals(0,result); Assert.assertEquals(0,result);
result = VariantContextUtils.findNumberofRepetitions("GCA".getBytes(),"GTAGGGTGTACACACACGCAGCAGCA".getBytes(), false); result = GATKVariantContextUtils.findNumberofRepetitions("GCA".getBytes(), "GTAGGGTGTACACACACGCAGCAGCA".getBytes(), false);
Assert.assertEquals(3,result); Assert.assertEquals(3,result);
// test logic to get repeat unit and number of repeats from covariate value // test logic to get repeat unit and number of repeats from covariate value
@ -208,8 +204,8 @@ public class RepeatCovariatesUnitTest {
Assert.assertEquals(rurlValM,rurlValI); Assert.assertEquals(rurlValM,rurlValI);
int fw = VariantContextUtils.findNumberofRepetitions(ruValM.getBytes(), readBases.substring(offset+1,readLength).getBytes(),true); int fw = GATKVariantContextUtils.findNumberofRepetitions(ruValM.getBytes(), readBases.substring(offset+1,readLength).getBytes(),true);
int bw = VariantContextUtils.findNumberofRepetitions(ruValM.getBytes(), readBases.substring(0,offset+1).getBytes(),false); int bw = GATKVariantContextUtils.findNumberofRepetitions(ruValM.getBytes(), readBases.substring(0,offset+1).getBytes(),false);
Assert.assertEquals(Math.min(fw+bw,RepeatCovariate.MAX_REPEAT_LENGTH),(int)Integer.valueOf(rlValM)); Assert.assertEquals(Math.min(fw+bw,RepeatCovariate.MAX_REPEAT_LENGTH),(int)Integer.valueOf(rlValM));
} }

View File

@ -31,7 +31,7 @@ import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.classloader.JVMUtils; import org.broadinstitute.sting.utils.classloader.JVMUtils;
import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.ApplicationDetails; import org.broadinstitute.sting.utils.help.ApplicationDetails;

View File

@ -26,7 +26,7 @@
package org.broadinstitute.sting.gatk.datasources.providers; package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import java.util.*; import java.util.*;

View File

@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.ArrayList; import java.util.ArrayList;

View File

@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.codecs.hapmap.RawHapMapFeature; import org.broadinstitute.sting.utils.codecs.hapmap.RawHapMapFeature;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.*; import org.broadinstitute.variant.variantcontext.*;
import java.util.*; import java.util.*;
@ -200,7 +201,7 @@ public class VariantContextAdaptors {
if ( isSNP(dbsnp) || isMNP(dbsnp) ) if ( isSNP(dbsnp) || isMNP(dbsnp) )
addPaddingBase = false; addPaddingBase = false;
else if ( isIndel(dbsnp) || dbsnp.getVariantType().contains("mixed") ) else if ( isIndel(dbsnp) || dbsnp.getVariantType().contains("mixed") )
addPaddingBase = refBaseIsDash || VariantContextUtils.requiresPaddingBase(stripNullDashes(getAlleleList(dbsnp))); addPaddingBase = refBaseIsDash || GATKVariantContextUtils.requiresPaddingBase(stripNullDashes(getAlleleList(dbsnp)));
else else
return null; // can't handle anything else return null; // can't handle anything else

View File

@ -40,7 +40,7 @@ import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.file.FSLockWithShared; import org.broadinstitute.sting.utils.file.FSLockWithShared;

View File

@ -35,7 +35,7 @@ import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.gatk.samples.SampleDB; import org.broadinstitute.sting.gatk.samples.SampleDB;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.recalibration.BQSRMode; import org.broadinstitute.sting.utils.recalibration.BQSRMode;

View File

@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
import org.broadinstitute.variant.vcf.*; import org.broadinstitute.variant.vcf.*;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.variant.variantcontext.*; import org.broadinstitute.variant.variantcontext.*;
@ -249,7 +250,7 @@ public class VariantAnnotatorEngine {
private VariantContext annotateDBs(final RefMetaDataTracker tracker, final GenomeLoc loc, VariantContext vc, final Map<String, Object> infoAnnotations) { private VariantContext annotateDBs(final RefMetaDataTracker tracker, final GenomeLoc loc, VariantContext vc, final Map<String, Object> infoAnnotations) {
for ( Map.Entry<RodBinding<VariantContext>, String> dbSet : dbAnnotations.entrySet() ) { for ( Map.Entry<RodBinding<VariantContext>, String> dbSet : dbAnnotations.entrySet() ) {
if ( dbSet.getValue().equals(VCFConstants.DBSNP_KEY) ) { if ( dbSet.getValue().equals(VCFConstants.DBSNP_KEY) ) {
final String rsID = VCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbSet.getKey(), loc), vc.getType()); final String rsID = GATKVCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbSet.getKey(), loc), vc.getType());
// add the ID if appropriate // add the ID if appropriate
if ( rsID != null ) { if ( rsID != null ) {

View File

@ -46,7 +46,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.refseq.RefSeqCodec; import org.broadinstitute.sting.utils.codecs.refseq.RefSeqCodec;
import org.broadinstitute.sting.utils.codecs.refseq.RefSeqFeature; import org.broadinstitute.sting.utils.codecs.refseq.RefSeqFeature;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;

View File

@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.io.PrintStream; import java.io.PrintStream;

View File

@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.variant.variantcontext.VariantContext; import org.broadinstitute.variant.variantcontext.VariantContext;

View File

@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RefWalker; import org.broadinstitute.sting.gatk.walkers.RefWalker;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.io.PrintStream; import java.io.PrintStream;

View File

@ -36,7 +36,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RefWalker; import org.broadinstitute.sting.gatk.walkers.RefWalker;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.io.PrintStream; import java.io.PrintStream;

View File

@ -43,7 +43,7 @@ import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.io.PrintStream; import java.io.PrintStream;

View File

@ -35,7 +35,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RefWalker; import org.broadinstitute.sting.gatk.walkers.RefWalker;
import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.util.Collections; import java.util.Collections;

View File

@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

View File

@ -45,7 +45,7 @@ import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.clipping.ClippingOp; import org.broadinstitute.sting.utils.clipping.ClippingOp;
import org.broadinstitute.sting.utils.clipping.ClippingRepresentation; import org.broadinstitute.sting.utils.clipping.ClippingRepresentation;
import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.clipping.ReadClipper;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

View File

@ -49,6 +49,7 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.VariantEvalUtils;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils; import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.vcf.VCFHeader; import org.broadinstitute.variant.vcf.VCFHeader;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
@ -197,7 +198,7 @@ public class VariantEval extends RodWalker<Integer, Integer> implements TreeRedu
protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50; protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50;
@Argument(shortName="ploidy", fullName="samplePloidy", doc="Per-sample ploidy (number of chromosomes per sample)", required=false) @Argument(shortName="ploidy", fullName="samplePloidy", doc="Per-sample ploidy (number of chromosomes per sample)", required=false)
protected int ploidy = VariantContextUtils.DEFAULT_PLOIDY; protected int ploidy = GATKVariantContextUtils.DEFAULT_PLOIDY;
@Argument(fullName="ancestralAlignments", shortName="aa", doc="Fasta file with ancestral alleles", required=false) @Argument(fullName="ancestralAlignments", shortName="aa", doc="Fasta file with ancestral alleles", required=false)
private File ancestralAlignmentsFile = null; private File ancestralAlignmentsFile = null;
@ -285,7 +286,7 @@ public class VariantEval extends RodWalker<Integer, Integer> implements TreeRedu
// Now that we have all the rods categorized, determine the sample list from the eval rods. // Now that we have all the rods categorized, determine the sample list from the eval rods.
Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), evals); Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), evals);
Set<String> vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE); Set<String> vcfSamples = SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
// Load the sample list, using an intermediate tree set to sort the samples // Load the sample list, using an intermediate tree set to sort the samples
final Set<String> allSampleNames = SampleUtils.getSamplesFromCommandLineInput(vcfSamples); final Set<String> allSampleNames = SampleUtils.getSamplesFromCommandLineInput(vcfSamples);

View File

@ -34,7 +34,7 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.AnalysisModuleScanner; import org.broadinstitute.sting.gatk.walkers.varianteval.util.AnalysisModuleScanner;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.EvaluationContext; import org.broadinstitute.sting.gatk.walkers.varianteval.util.EvaluationContext;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.StingException;

View File

@ -27,8 +27,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.VariantContext; import org.broadinstitute.variant.variantcontext.VariantContext;
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
@ -51,7 +51,7 @@ public class TandemRepeat extends VariantStratifier {
public List<Object> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { public List<Object> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
if ( eval == null || ! eval.isIndel() ) if ( eval == null || ! eval.isIndel() )
return ALL; return ALL;
else if ( VariantContextUtils.isTandemRepeat(eval, ref.getForwardBases()) ) { else if ( GATKVariantContextUtils.isTandemRepeat(eval, ref.getForwardBases()) ) {
print("REPEAT", eval, ref); print("REPEAT", eval, ref);
return REPEAT; return REPEAT;
} else { } else {

View File

@ -28,7 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.manage
import com.google.java.contract.Ensures; import com.google.java.contract.Ensures;
import com.google.java.contract.Requires; import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.*; import java.util.*;

View File

@ -38,6 +38,7 @@ import org.broadinstitute.sting.gatk.walkers.Window;
import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCountConstants; import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCountConstants;
import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils; import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.vcf.*; import org.broadinstitute.variant.vcf.*;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
@ -135,14 +136,14 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
protected VariantContextWriter vcfWriter = null; protected VariantContextWriter vcfWriter = null;
@Argument(shortName="genotypeMergeOptions", doc="Determines how we should merge genotype records for samples shared across the ROD files", required=false) @Argument(shortName="genotypeMergeOptions", doc="Determines how we should merge genotype records for samples shared across the ROD files", required=false)
public VariantContextUtils.GenotypeMergeType genotypeMergeOption = null; public GATKVariantContextUtils.GenotypeMergeType genotypeMergeOption = null;
@Argument(shortName="filteredRecordsMergeType", doc="Determines how we should handle records seen at the same site in the VCF, but with different FILTER fields", required=false) @Argument(shortName="filteredRecordsMergeType", doc="Determines how we should handle records seen at the same site in the VCF, but with different FILTER fields", required=false)
public VariantContextUtils.FilteredRecordMergeType filteredRecordsMergeType = VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED; public GATKVariantContextUtils.FilteredRecordMergeType filteredRecordsMergeType = GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED;
@Hidden @Hidden
@Argument(shortName="multipleAllelesMergeType", doc="Determines how we should handle records seen at the same site in the VCF, but with different allele types (for example, SNP vs. indel)", required=false) @Argument(shortName="multipleAllelesMergeType", doc="Determines how we should handle records seen at the same site in the VCF, but with different allele types (for example, SNP vs. indel)", required=false)
public VariantContextUtils.MultipleAllelesMergeType multipleAllelesMergeType = VariantContextUtils.MultipleAllelesMergeType.BY_TYPE; public GATKVariantContextUtils.MultipleAllelesMergeType multipleAllelesMergeType = GATKVariantContextUtils.MultipleAllelesMergeType.BY_TYPE;
/** /**
* Used when taking the union of variants that contain genotypes. A complete priority list MUST be provided. * Used when taking the union of variants that contain genotypes. A complete priority list MUST be provided.
@ -203,12 +204,12 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
validateAnnotateUnionArguments(); validateAnnotateUnionArguments();
if ( PRIORITY_STRING == null && genotypeMergeOption == null) { if ( PRIORITY_STRING == null && genotypeMergeOption == null) {
genotypeMergeOption = VariantContextUtils.GenotypeMergeType.UNSORTED; genotypeMergeOption = GATKVariantContextUtils.GenotypeMergeType.UNSORTED;
//PRIORITY_STRING = Utils.join(",", vcfRods.keySet()); Deleted by Ami (7/10/12) //PRIORITY_STRING = Utils.join(",", vcfRods.keySet()); Deleted by Ami (7/10/12)
logger.info("Priority string is not provided, using arbitrary genotyping order: "+priority); logger.info("Priority string is not provided, using arbitrary genotyping order: "+priority);
} }
if (genotypeMergeOption == VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE && if (genotypeMergeOption == GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE &&
!SampleUtils.verifyUniqueSamplesNames(vcfRods)) !SampleUtils.verifyUniqueSamplesNames(vcfRods))
throw new IllegalStateException("REQUIRE_UNIQUE sample names is true but duplicate names were discovered."); throw new IllegalStateException("REQUIRE_UNIQUE sample names is true but duplicate names were discovered.");
@ -232,7 +233,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
private void validateAnnotateUnionArguments() { private void validateAnnotateUnionArguments() {
Set<String> rodNames = SampleUtils.getRodNamesWithVCFHeader(getToolkit(), null); Set<String> rodNames = SampleUtils.getRodNamesWithVCFHeader(getToolkit(), null);
if ( genotypeMergeOption == VariantContextUtils.GenotypeMergeType.PRIORITIZE && PRIORITY_STRING == null ) if ( genotypeMergeOption == GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE && PRIORITY_STRING == null )
throw new UserException.MissingArgument("rod_priority_list", "Priority string must be provided if you want to prioritize genotypes"); throw new UserException.MissingArgument("rod_priority_list", "Priority string must be provided if you want to prioritize genotypes");
if ( PRIORITY_STRING != null){ if ( PRIORITY_STRING != null){
@ -278,7 +279,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
List<VariantContext> mergedVCs = new ArrayList<VariantContext>(); List<VariantContext> mergedVCs = new ArrayList<VariantContext>();
if (multipleAllelesMergeType == VariantContextUtils.MultipleAllelesMergeType.BY_TYPE) { if (multipleAllelesMergeType == GATKVariantContextUtils.MultipleAllelesMergeType.BY_TYPE) {
Map<VariantContext.Type, List<VariantContext>> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs); Map<VariantContext.Type, List<VariantContext>> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs);
// TODO -- clean this up in a refactoring // TODO -- clean this up in a refactoring
@ -296,13 +297,13 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
// iterate over the types so that it's deterministic // iterate over the types so that it's deterministic
for (VariantContext.Type type : VariantContext.Type.values()) { for (VariantContext.Type type : VariantContext.Type.values()) {
if (VCsByType.containsKey(type)) if (VCsByType.containsKey(type))
mergedVCs.add(VariantContextUtils.simpleMerge(VCsByType.get(type), mergedVCs.add(GATKVariantContextUtils.simpleMerge(VCsByType.get(type),
priority, rodNames.size() , filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges, priority, rodNames.size(), filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC)); SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
} }
} }
else if (multipleAllelesMergeType == VariantContextUtils.MultipleAllelesMergeType.MIX_TYPES) { else if (multipleAllelesMergeType == GATKVariantContextUtils.MultipleAllelesMergeType.MIX_TYPES) {
mergedVCs.add(VariantContextUtils.simpleMerge(vcs, mergedVCs.add(GATKVariantContextUtils.simpleMerge(vcs,
priority, rodNames.size(), filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges, priority, rodNames.size(), filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC)); SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
} }

View File

@ -32,7 +32,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.report.GATKReport; import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.report.GATKReportTable; import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils; import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
import org.broadinstitute.variant.variantcontext.*; import org.broadinstitute.variant.variantcontext.*;
import org.broadinstitute.variant.vcf.VCFHeader; import org.broadinstitute.variant.vcf.VCFHeader;

View File

@ -39,12 +39,12 @@ import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
import org.broadinstitute.sting.utils.interval.IntervalSetRule; import org.broadinstitute.sting.utils.interval.IntervalSetRule;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils; import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.vcf.*; import org.broadinstitute.variant.vcf.*;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.sting.utils.text.ListFileUtils; import org.broadinstitute.sting.utils.text.ListFileUtils;
import org.broadinstitute.variant.variantcontext.VariantContext; import org.broadinstitute.variant.variantcontext.VariantContext;
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
import java.io.File; import java.io.File;
import java.util.*; import java.util.*;
@ -204,7 +204,7 @@ public class SelectHeaders extends RodWalker<Integer, Integer> implements TreeRe
} }
} }
TreeSet<String> vcfSamples = new TreeSet<String>(SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE)); TreeSet<String> vcfSamples = new TreeSet<String>(SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
VCFHeader vcfHeader = new VCFHeader(headerLines, vcfSamples); VCFHeader vcfHeader = new VCFHeader(headerLines, vcfSamples);
vcfHeader.setWriteEngineHeaders(includeEngineHeaders); vcfHeader.setWriteEngineHeaders(includeEngineHeaders);
vcfWriter.writeHeader(vcfHeader); vcfWriter.writeHeader(vcfHeader);

View File

@ -39,6 +39,7 @@ import org.broadinstitute.sting.utils.MendelianViolation;
import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils; import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.vcf.*; import org.broadinstitute.variant.vcf.*;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
@ -337,7 +338,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
List<String> rodNames = Arrays.asList(variantCollection.variants.getName()); List<String> rodNames = Arrays.asList(variantCollection.variants.getName());
vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames); vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames);
TreeSet<String> vcfSamples = new TreeSet<String>(SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE)); TreeSet<String> vcfSamples = new TreeSet<String>(SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
Collection<String> samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFiles); Collection<String> samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFiles);
Collection<String> samplesFromExpressions = SampleUtils.matchSamplesExpressions(vcfSamples, sampleExpressions); Collection<String> samplesFromExpressions = SampleUtils.matchSamplesExpressions(vcfSamples, sampleExpressions);
@ -661,7 +662,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
// if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs and AD (because they are no longer accurate) // if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs and AD (because they are no longer accurate)
if ( vc.getAlleles().size() != sub.getAlleles().size() ) if ( vc.getAlleles().size() != sub.getAlleles().size() )
newGC = VariantContextUtils.stripPLsAndAD(sub.getGenotypes()); newGC = GATKVariantContextUtils.stripPLsAndAD(sub.getGenotypes());
// if we have fewer samples in the selected VC than in the original VC, we need to strip out the MLE tags // if we have fewer samples in the selected VC than in the original VC, we need to strip out the MLE tags
if ( vc.getNSamples() != sub.getNSamples() ) { if ( vc.getNSamples() != sub.getNSamples() ) {

View File

@ -35,13 +35,13 @@ import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils; import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.vcf.*; import org.broadinstitute.variant.vcf.*;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.variant.variantcontext.Allele; import org.broadinstitute.variant.variantcontext.Allele;
import org.broadinstitute.variant.variantcontext.VariantContext; import org.broadinstitute.variant.variantcontext.VariantContext;
import org.broadinstitute.variant.variantcontext.VariantContextBuilder; import org.broadinstitute.variant.variantcontext.VariantContextBuilder;
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
import java.util.*; import java.util.*;
@ -256,7 +256,7 @@ public class VariantValidationAssessor extends RodWalker<VariantContext,Integer>
//if ( popFile != null ) { //if ( popFile != null ) {
// throw new StingException("We still need to implement this!"); // throw new StingException("We still need to implement this!");
//} else { //} else {
return VariantContextUtils.computeHardyWeinbergPvalue(vc); return GATKVariantContextUtils.computeHardyWeinbergPvalue(vc);
//} //}
} }

View File

@ -29,6 +29,7 @@ import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils; import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.vcf.VCFConstants; import org.broadinstitute.variant.vcf.VCFConstants;
import org.broadinstitute.variant.vcf.VCFHeader; import org.broadinstitute.variant.vcf.VCFHeader;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
@ -180,7 +181,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
if ( !genotypeFieldsToTake.isEmpty() ) { if ( !genotypeFieldsToTake.isEmpty() ) {
Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), variants); Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), variants);
TreeSet<String> vcfSamples = new TreeSet<String>(SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE)); TreeSet<String> vcfSamples = new TreeSet<String>(SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
samples.addAll(vcfSamples); samples.addAll(vcfSamples);
// optimization: if there are no samples, we don't have to worry about any genotype fields // optimization: if there are no samples, we don't have to worry about any genotype fields

View File

@ -119,7 +119,7 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
if ( tracker == null || !BaseUtils.isRegularBase(ref.getBase()) ) if ( tracker == null || !BaseUtils.isRegularBase(ref.getBase()) )
return 0; return 0;
String rsID = dbsnp == null ? null : VCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbsnp.dbsnp, context.getLocation()), VariantContext.Type.SNP); String rsID = dbsnp == null ? null : GATKVCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbsnp.dbsnp, context.getLocation()), VariantContext.Type.SNP);
Collection<VariantContext> contexts = getVariantContexts(tracker, ref); Collection<VariantContext> contexts = getVariantContexts(tracker, ref);

View File

@ -36,7 +36,7 @@ import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.commandline.CommandLineProgram;
import org.broadinstitute.variant.bcf2.BCF2Codec; import org.broadinstitute.variant.bcf2.BCF2Codec;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.vcf.VCFCodec; import org.broadinstitute.variant.vcf.VCFCodec;
import org.broadinstitute.variant.vcf.VCFHeader; import org.broadinstitute.variant.vcf.VCFHeader;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;

View File

@ -33,7 +33,7 @@ import org.apache.commons.math.MathException;
import org.apache.commons.math.distribution.NormalDistribution; import org.apache.commons.math.distribution.NormalDistribution;
import org.apache.commons.math.distribution.NormalDistributionImpl; import org.apache.commons.math.distribution.NormalDistributionImpl;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.StingException;
import java.io.Serializable; import java.io.Serializable;

View File

@ -28,7 +28,7 @@ package org.broadinstitute.sting.utils;
import net.sf.samtools.Cigar; import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement; import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator; import net.sf.samtools.CigarOperator;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.StingException;
import java.util.*; import java.util.*;

View File

@ -29,11 +29,11 @@ import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMReadGroupRecord; import net.sf.samtools.SAMReadGroupRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils; import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.vcf.VCFHeader; import org.broadinstitute.variant.vcf.VCFHeader;
import org.broadinstitute.sting.utils.text.ListFileUtils; import org.broadinstitute.sting.utils.text.ListFileUtils;
import org.broadinstitute.sting.utils.text.XReadLines; import org.broadinstitute.sting.utils.text.XReadLines;
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
@ -117,15 +117,15 @@ public class SampleUtils {
} }
public static Set<String> getSampleList(Map<String, VCFHeader> headers) { public static Set<String> getSampleList(Map<String, VCFHeader> headers) {
return getSampleList(headers, VariantContextUtils.GenotypeMergeType.PRIORITIZE); return getSampleList(headers, GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE);
} }
public static Set<String> getSampleList(Map<String, VCFHeader> headers, VariantContextUtils.GenotypeMergeType mergeOption) { public static Set<String> getSampleList(Map<String, VCFHeader> headers, GATKVariantContextUtils.GenotypeMergeType mergeOption) {
Set<String> samples = new TreeSet<String>(); Set<String> samples = new TreeSet<String>();
for ( Map.Entry<String, VCFHeader> val : headers.entrySet() ) { for ( Map.Entry<String, VCFHeader> val : headers.entrySet() ) {
VCFHeader header = val.getValue(); VCFHeader header = val.getValue();
for ( String sample : header.getGenotypeSamples() ) { for ( String sample : header.getGenotypeSamples() ) {
samples.add(VariantContextUtils.mergedSampleName(val.getKey(), sample, mergeOption == VariantContextUtils.GenotypeMergeType.UNIQUIFY)); samples.add(GATKVariantContextUtils.mergedSampleName(val.getKey(), sample, mergeOption == GATKVariantContextUtils.GenotypeMergeType.UNIQUIFY));
} }
} }

View File

@ -32,7 +32,7 @@ import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMUtils; import net.sf.samtools.SAMUtils;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.sam.ReadUtils;

View File

@ -23,7 +23,7 @@
* THE USE OR OTHER DEALINGS IN THE SOFTWARE. * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
package org.broadinstitute.variant.utils; package org.broadinstitute.sting.utils.collections;
public class Pair<X,Y> { public class Pair<X,Y> {

View File

@ -29,7 +29,7 @@ import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;

View File

@ -30,7 +30,7 @@ import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator; import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.recalibration.EventType; import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;

View File

@ -37,7 +37,7 @@ import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager; import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.classloader.JVMUtils; import org.broadinstitute.sting.utils.classloader.JVMUtils;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.StingException;

View File

@ -39,7 +39,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.text.XReadLines; import org.broadinstitute.sting.utils.text.XReadLines;

View File

@ -32,7 +32,7 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.NGSPlatform; import org.broadinstitute.sting.utils.NGSPlatform;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.variant.utils.BaseUtils; import org.broadinstitute.variant.utils.BaseUtils;

View File

@ -31,7 +31,7 @@ import org.broad.tribble.readers.PositionalBufferedStream;
import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.variantcontext.VariantContext; import org.broadinstitute.variant.variantcontext.VariantContext;
import org.broadinstitute.variant.vcf.*; import org.broadinstitute.variant.vcf.*;
@ -147,4 +147,49 @@ public class GATKVCFUtils {
return VCFUtils.withUpdatedContigs(header, engine.getArguments().referenceFile, engine.getMasterSequenceDictionary()); return VCFUtils.withUpdatedContigs(header, engine.getArguments().referenceFile, engine.getMasterSequenceDictionary());
} }
public static String rsIDOfFirstRealVariant(List<VariantContext> VCs, VariantContext.Type type) {
if ( VCs == null )
return null;
String rsID = null;
for ( VariantContext vc : VCs ) {
if ( vc.getType() == type ) {
rsID = vc.getID();
break;
}
}
return rsID;
}
/**
* Read all of the VCF records from source into memory, returning the header and the VariantContexts
*
* SHOULD ONLY BE USED FOR UNIT/INTEGRATION TESTING PURPOSES!
*
* @param source the file to read, must be in VCF4 format
* @return
* @throws java.io.IOException
*/
public static Pair<VCFHeader, List<VariantContext>> readVCF(final File source) throws IOException {
// read in the features
final List<VariantContext> vcs = new ArrayList<VariantContext>();
final VCFCodec codec = new VCFCodec();
PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(source));
FeatureCodecHeader header = codec.readHeader(pbs);
pbs.close();
pbs = new PositionalBufferedStream(new FileInputStream(source));
pbs.skip(header.getHeaderEnd());
final VCFHeader vcfHeader = (VCFHeader)header.getHeaderValue();
while ( ! pbs.isDone() ) {
final VariantContext vc = codec.decode(pbs);
if ( vc != null )
vcs.add(vc);
}
return new Pair<VCFHeader, List<VariantContext>>(vcfHeader, vcs);
}
} }

View File

@ -25,12 +25,79 @@
package org.broadinstitute.sting.utils.variant; package org.broadinstitute.sting.utils.variant;
import com.google.java.contract.Requires;
import org.apache.commons.lang.ArrayUtils;
import org.apache.log4j.Logger;
import org.broad.tribble.TribbleException;
import org.broad.tribble.util.popgen.HardyWeinbergCalculation;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.variantcontext.*; import org.broadinstitute.variant.variantcontext.*;
import org.broadinstitute.variant.vcf.VCFConstants;
import java.io.Serializable;
import java.util.*;
public class GATKVariantContextUtils { public class GATKVariantContextUtils {
private static Logger logger = Logger.getLogger(GATKVariantContextUtils.class);
public static final int DEFAULT_PLOIDY = 2;
public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
private static final List<Allele> NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
public final static String MERGE_FILTER_PREFIX = "filterIn";
public final static String MERGE_REF_IN_ALL = "ReferenceInAll";
public final static String MERGE_FILTER_IN_ALL = "FilteredInAll";
public final static String MERGE_INTERSECTION = "Intersection";
public enum GenotypeMergeType {
/**
* Make all sample genotypes unique by file. Each sample shared across RODs gets named sample.ROD.
*/
UNIQUIFY,
/**
* Take genotypes in priority order (see the priority argument).
*/
PRIORITIZE,
/**
* Take the genotypes in any order.
*/
UNSORTED,
/**
* Require that all samples/genotypes be unique between all inputs.
*/
REQUIRE_UNIQUE
}
public enum FilteredRecordMergeType {
/**
* Union - leaves the record if any record is unfiltered.
*/
KEEP_IF_ANY_UNFILTERED,
/**
* Requires all records present at site to be unfiltered. VCF files that don't contain the record don't influence this.
*/
KEEP_IF_ALL_UNFILTERED,
/**
* If any record is present at this site (regardless of possibly being filtered), then all such records are kept and the filters are reset.
*/
KEEP_UNCONDITIONAL
}
public enum MultipleAllelesMergeType {
/**
* Combine only alleles of the same type (SNP, indel, etc.) into a single VCF record.
*/
BY_TYPE,
/**
* Merge all allele types at the same start position into the same VCF record.
*/
MIX_TYPES
}
/** /**
* create a genome location, given a variant context * create a genome location, given a variant context
* @param genomeLocParser parser * @param genomeLocParser parser
@ -41,4 +108,885 @@ public class GATKVariantContextUtils {
return genomeLocParser.createGenomeLoc(vc.getChr(), vc.getStart(), vc.getEnd(), true); return genomeLocParser.createGenomeLoc(vc.getChr(), vc.getStart(), vc.getEnd(), true);
} }
/**
* Returns true iff VC is an non-complex indel where every allele represents an expansion or
* contraction of a series of identical bases in the reference.
*
* For example, suppose the ref bases are CTCTCTGA, which includes a 3x repeat of CTCTCT
*
* If VC = -/CT, then this function returns true because the CT insertion matches exactly the
* upcoming reference.
* If VC = -/CTA then this function returns false because the CTA isn't a perfect match
*
* Now consider deletions:
*
* If VC = CT/- then again the same logic applies and this returns true
* The case of CTA/- makes no sense because it doesn't actually match the reference bases.
*
* The logic of this function is pretty simple. Take all of the non-null alleles in VC. For
* each insertion allele of n bases, check if that allele matches the next n reference bases.
* For each deletion allele of n bases, check if this matches the reference bases at n - 2 n,
* as it must necessarily match the first n bases. If this test returns true for all
* alleles you are a tandem repeat, otherwise you are not.
*
* @param vc
* @param refBasesStartingAtVCWithPad not this is assumed to include the PADDED reference
* @return
*/
@Requires({"vc != null", "refBasesStartingAtVCWithPad != null && refBasesStartingAtVCWithPad.length > 0"})
public static boolean isTandemRepeat(final VariantContext vc, final byte[] refBasesStartingAtVCWithPad) {
final String refBasesStartingAtVCWithoutPad = new String(refBasesStartingAtVCWithPad).substring(1);
if ( ! vc.isIndel() ) // only indels are tandem repeats
return false;
final Allele ref = vc.getReference();
for ( final Allele allele : vc.getAlternateAlleles() ) {
if ( ! isRepeatAllele(ref, allele, refBasesStartingAtVCWithoutPad) )
return false;
}
// we've passed all of the tests, so we are a repeat
return true;
}
/**
*
* @param vc
* @param refBasesStartingAtVCWithPad
* @return
*/
@Requires({"vc != null", "refBasesStartingAtVCWithPad != null && refBasesStartingAtVCWithPad.length > 0"})
public static Pair<List<Integer>,byte[]> getNumTandemRepeatUnits(final VariantContext vc, final byte[] refBasesStartingAtVCWithPad) {
final boolean VERBOSE = false;
final String refBasesStartingAtVCWithoutPad = new String(refBasesStartingAtVCWithPad).substring(1);
if ( ! vc.isIndel() ) // only indels are tandem repeats
return null;
final Allele refAllele = vc.getReference();
final byte[] refAlleleBases = Arrays.copyOfRange(refAllele.getBases(), 1, refAllele.length());
byte[] repeatUnit = null;
final ArrayList<Integer> lengths = new ArrayList<Integer>();
for ( final Allele allele : vc.getAlternateAlleles() ) {
Pair<int[],byte[]> result = getNumTandemRepeatUnits(refAlleleBases, Arrays.copyOfRange(allele.getBases(), 1, allele.length()), refBasesStartingAtVCWithoutPad.getBytes());
final int[] repetitionCount = result.first;
// repetition count = 0 means allele is not a tandem expansion of context
if (repetitionCount[0] == 0 || repetitionCount[1] == 0)
return null;
if (lengths.size() == 0) {
lengths.add(repetitionCount[0]); // add ref allele length only once
}
lengths.add(repetitionCount[1]); // add this alt allele's length
repeatUnit = result.second;
if (VERBOSE) {
System.out.println("RefContext:"+refBasesStartingAtVCWithoutPad);
System.out.println("Ref:"+refAllele.toString()+" Count:" + String.valueOf(repetitionCount[0]));
System.out.println("Allele:"+allele.toString()+" Count:" + String.valueOf(repetitionCount[1]));
System.out.println("RU:"+new String(repeatUnit));
}
}
return new Pair<List<Integer>, byte[]>(lengths,repeatUnit);
}
public static Pair<int[],byte[]> getNumTandemRepeatUnits(final byte[] refBases, final byte[] altBases, final byte[] remainingRefContext) {
/* we can't exactly apply same logic as in basesAreRepeated() to compute tandem unit and number of repeated units.
Consider case where ref =ATATAT and we have an insertion of ATAT. Natural description is (AT)3 -> (AT)2.
*/
byte[] longB;
// find first repeat unit based on either ref or alt, whichever is longer
if (altBases.length > refBases.length)
longB = altBases;
else
longB = refBases;
// see if non-null allele (either ref or alt, whichever is longer) can be decomposed into several identical tandem units
// for example, -*,CACA needs to first be decomposed into (CA)2
final int repeatUnitLength = findRepeatedSubstring(longB);
final byte[] repeatUnit = Arrays.copyOf(longB, repeatUnitLength);
final int[] repetitionCount = new int[2];
// look for repetitions forward on the ref bases (i.e. starting at beginning of ref bases)
int repetitionsInRef = findNumberofRepetitions(repeatUnit,refBases, true);
repetitionCount[0] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(refBases, remainingRefContext), true)-repetitionsInRef;
repetitionCount[1] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(altBases, remainingRefContext), true)-repetitionsInRef;
return new Pair<int[], byte[]>(repetitionCount, repeatUnit);
}
/**
* Find out if a string can be represented as a tandem number of substrings.
* For example ACTACT is a 2-tandem of ACT,
* but ACTACA is not.
*
* @param bases String to be tested
* @return Length of repeat unit, if string can be represented as tandem of substring (if it can't
* be represented as one, it will be just the length of the input string)
*/
public static int findRepeatedSubstring(byte[] bases) {
int repLength;
for (repLength=1; repLength <=bases.length; repLength++) {
final byte[] candidateRepeatUnit = Arrays.copyOf(bases,repLength);
boolean allBasesMatch = true;
for (int start = repLength; start < bases.length; start += repLength ) {
// check that remaining of string is exactly equal to repeat unit
final byte[] basePiece = Arrays.copyOfRange(bases,start,start+candidateRepeatUnit.length);
if (!Arrays.equals(candidateRepeatUnit, basePiece)) {
allBasesMatch = false;
break;
}
}
if (allBasesMatch)
return repLength;
}
return repLength;
}
/**
* Helper routine that finds number of repetitions a string consists of.
* For example, for string ATAT and repeat unit AT, number of repetitions = 2
* @param repeatUnit Substring
* @param testString String to test
* @oaram lookForward Look for repetitions forward (at beginning of string) or backward (at end of string)
* @return Number of repetitions (0 if testString is not a concatenation of n repeatUnit's
*/
public static int findNumberofRepetitions(byte[] repeatUnit, byte[] testString, boolean lookForward) {
int numRepeats = 0;
if (lookForward) {
// look forward on the test string
for (int start = 0; start < testString.length; start += repeatUnit.length) {
int end = start + repeatUnit.length;
byte[] unit = Arrays.copyOfRange(testString,start, end);
if(Arrays.equals(unit,repeatUnit))
numRepeats++;
else
break;
}
return numRepeats;
}
// look backward. For example, if repeatUnit = AT and testString = GATAT, number of repeat units is still 2
// look forward on the test string
for (int start = testString.length - repeatUnit.length; start >= 0; start -= repeatUnit.length) {
int end = start + repeatUnit.length;
byte[] unit = Arrays.copyOfRange(testString,start, end);
if(Arrays.equals(unit,repeatUnit))
numRepeats++;
else
break;
}
return numRepeats;
}
/**
* Helper function for isTandemRepeat that checks that allele matches somewhere on the reference
* @param ref
* @param alt
* @param refBasesStartingAtVCWithoutPad
* @return
*/
protected static boolean isRepeatAllele(final Allele ref, final Allele alt, final String refBasesStartingAtVCWithoutPad) {
if ( ! Allele.oneIsPrefixOfOther(ref, alt) )
return false; // we require one allele be a prefix of another
if ( ref.length() > alt.length() ) { // we are a deletion
return basesAreRepeated(ref.getBaseString(), alt.getBaseString(), refBasesStartingAtVCWithoutPad, 2);
} else { // we are an insertion
return basesAreRepeated(alt.getBaseString(), ref.getBaseString(), refBasesStartingAtVCWithoutPad, 1);
}
}
protected static boolean basesAreRepeated(final String l, final String s, final String ref, final int minNumberOfMatches) {
final String potentialRepeat = l.substring(s.length()); // skip s bases
for ( int i = 0; i < minNumberOfMatches; i++) {
final int start = i * potentialRepeat.length();
final int end = (i+1) * potentialRepeat.length();
if ( ref.length() < end )
return false; // we ran out of bases to test
final String refSub = ref.substring(start, end);
if ( ! refSub.equals(potentialRepeat) )
return false; // repeat didn't match, fail
}
return true; // we passed all tests, we matched
}
/**
* subset the Variant Context to the specific set of alleles passed in (pruning the PLs appropriately)
*
* @param vc variant context with genotype likelihoods
* @param allelesToUse which alleles from the vc are okay to use; *** must be in the same relative order as those in the original VC ***
* @param assignGenotypes true if we should update the genotypes based on the (subsetted) PLs
* @return genotypes
*/
public static GenotypesContext subsetDiploidAlleles(final VariantContext vc,
final List<Allele> allelesToUse,
final boolean assignGenotypes) {
// the genotypes with PLs
final GenotypesContext oldGTs = vc.getGenotypes();
// samples
final List<String> sampleIndices = oldGTs.getSampleNamesOrderedByName();
// the new genotypes to create
final GenotypesContext newGTs = GenotypesContext.create();
// we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward
final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
final int numNewAltAlleles = allelesToUse.size() - 1;
// which PLs should be carried forward?
ArrayList<Integer> likelihoodIndexesToUse = null;
// an optimization: if we are supposed to use all (or none in the case of a ref call) of the alleles,
// then we can keep the PLs as is; otherwise, we determine which ones to keep
if ( numNewAltAlleles != numOriginalAltAlleles && numNewAltAlleles > 0 ) {
likelihoodIndexesToUse = new ArrayList<Integer>(30);
final boolean[] altAlleleIndexToUse = new boolean[numOriginalAltAlleles];
for ( int i = 0; i < numOriginalAltAlleles; i++ ) {
if ( allelesToUse.contains(vc.getAlternateAllele(i)) )
altAlleleIndexToUse[i] = true;
}
// numLikelihoods takes total # of alleles. Use default # of chromosomes (ploidy) = 2
final int numLikelihoods = GenotypeLikelihoods.numLikelihoods(1 + numOriginalAltAlleles, DEFAULT_PLOIDY);
for ( int PLindex = 0; PLindex < numLikelihoods; PLindex++ ) {
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
// consider this entry only if both of the alleles are good
if ( (alleles.alleleIndex1 == 0 || altAlleleIndexToUse[alleles.alleleIndex1 - 1]) && (alleles.alleleIndex2 == 0 || altAlleleIndexToUse[alleles.alleleIndex2 - 1]) )
likelihoodIndexesToUse.add(PLindex);
}
}
// create the new genotypes
for ( int k = 0; k < oldGTs.size(); k++ ) {
final Genotype g = oldGTs.get(sampleIndices.get(k));
if ( !g.hasLikelihoods() ) {
newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES));
continue;
}
// create the new likelihoods array from the alleles we are allowed to use
final double[] originalLikelihoods = g.getLikelihoods().getAsVector();
double[] newLikelihoods;
if ( likelihoodIndexesToUse == null ) {
newLikelihoods = originalLikelihoods;
} else {
newLikelihoods = new double[likelihoodIndexesToUse.size()];
int newIndex = 0;
for ( int oldIndex : likelihoodIndexesToUse )
newLikelihoods[newIndex++] = originalLikelihoods[oldIndex];
// might need to re-normalize
newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true);
}
// if there is no mass on the (new) likelihoods, then just no-call the sample
if ( MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) {
newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES));
}
else {
final GenotypeBuilder gb = new GenotypeBuilder(g);
if ( numNewAltAlleles == 0 )
gb.noPL();
else
gb.PL(newLikelihoods);
// if we weren't asked to assign a genotype, then just no-call the sample
if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) {
gb.alleles(NO_CALL_ALLELES);
}
else {
// find the genotype with maximum likelihoods
int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods);
GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
gb.alleles(Arrays.asList(allelesToUse.get(alleles.alleleIndex1), allelesToUse.get(alleles.alleleIndex2)));
if ( numNewAltAlleles != 0 ) gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods));
}
newGTs.add(gb.make());
}
}
return newGTs;
}
/**
* Assign genotypes (GTs) to the samples in the Variant Context greedily based on the PLs
*
* @param vc variant context with genotype likelihoods
* @return genotypes context
*/
public static GenotypesContext assignDiploidGenotypes(final VariantContext vc) {
return subsetDiploidAlleles(vc, vc.getAlleles(), true);
}
/**
* Split variant context into its biallelic components if there are more than 2 alleles
*
* For VC has A/B/C alleles, returns A/B and A/C contexts.
* Genotypes are all no-calls now (it's not possible to fix them easily)
* Alleles are right trimmed to satisfy VCF conventions
*
* If vc is biallelic or non-variant it is just returned
*
* Chromosome counts are updated (but they are by definition 0)
*
* @param vc a potentially multi-allelic variant context
* @return a list of bi-allelic (or monomorphic) variant context
*/
public static List<VariantContext> splitVariantContextToBiallelics(final VariantContext vc) {
if ( ! vc.isVariant() || vc.isBiallelic() )
// non variant or biallelics already satisfy the contract
return Collections.singletonList(vc);
else {
final List<VariantContext> biallelics = new LinkedList<VariantContext>();
for ( final Allele alt : vc.getAlternateAlleles() ) {
VariantContextBuilder builder = new VariantContextBuilder(vc);
final List<Allele> alleles = Arrays.asList(vc.getReference(), alt);
builder.alleles(alleles);
builder.genotypes(subsetDiploidAlleles(vc, alleles, false));
VariantContextUtils.calculateChromosomeCounts(builder, true);
biallelics.add(reverseTrimAlleles(builder.make()));
}
return biallelics;
}
}
public static Genotype removePLsAndAD(final Genotype g) {
return ( g.hasLikelihoods() || g.hasAD() ) ? new GenotypeBuilder(g).noPL().noAD().make() : g;
}
/**
* Merges VariantContexts into a single hybrid. Takes genotypes for common samples in priority order, if provided.
* If uniquifySamples is true, the priority order is ignored and names are created by concatenating the VC name with
* the sample name
*
* @param unsortedVCs collection of unsorted VCs
* @param priorityListOfVCs priority list detailing the order in which we should grab the VCs
* @param filteredRecordMergeType merge type for filtered records
* @param genotypeMergeOptions merge option for genotypes
* @param annotateOrigin should we annotate the set it came from?
* @param printMessages should we print messages?
* @param setKey the key name of the set
* @param filteredAreUncalled are filtered records uncalled?
* @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count?
* @return new VariantContext representing the merge of unsortedVCs
*/
public static VariantContext simpleMerge(final Collection<VariantContext> unsortedVCs,
final List<String> priorityListOfVCs,
final FilteredRecordMergeType filteredRecordMergeType,
final GenotypeMergeType genotypeMergeOptions,
final boolean annotateOrigin,
final boolean printMessages,
final String setKey,
final boolean filteredAreUncalled,
final boolean mergeInfoWithMaxAC ) {
int originalNumOfVCs = priorityListOfVCs == null ? 0 : priorityListOfVCs.size();
return simpleMerge(unsortedVCs,priorityListOfVCs,originalNumOfVCs,filteredRecordMergeType,genotypeMergeOptions,annotateOrigin,printMessages,setKey,filteredAreUncalled,mergeInfoWithMaxAC);
}
/**
* Merges VariantContexts into a single hybrid. Takes genotypes for common samples in priority order, if provided.
* If uniquifySamples is true, the priority order is ignored and names are created by concatenating the VC name with
* the sample name.
* simpleMerge does not verify any more unique sample names EVEN if genotypeMergeOptions == GenotypeMergeType.REQUIRE_UNIQUE. One should use
* SampleUtils.verifyUniqueSamplesNames to check that before using sempleMerge.
*
* @param unsortedVCs collection of unsorted VCs
* @param priorityListOfVCs priority list detailing the order in which we should grab the VCs
* @param filteredRecordMergeType merge type for filtered records
* @param genotypeMergeOptions merge option for genotypes
* @param annotateOrigin should we annotate the set it came from?
* @param printMessages should we print messages?
* @param setKey the key name of the set
* @param filteredAreUncalled are filtered records uncalled?
* @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count?
* @return new VariantContext representing the merge of unsortedVCs
*/
public static VariantContext simpleMerge(final Collection<VariantContext> unsortedVCs,
final List<String> priorityListOfVCs,
final int originalNumOfVCs,
final FilteredRecordMergeType filteredRecordMergeType,
final GenotypeMergeType genotypeMergeOptions,
final boolean annotateOrigin,
final boolean printMessages,
final String setKey,
final boolean filteredAreUncalled,
final boolean mergeInfoWithMaxAC ) {
if ( unsortedVCs == null || unsortedVCs.size() == 0 )
return null;
if (priorityListOfVCs != null && originalNumOfVCs != priorityListOfVCs.size())
throw new IllegalArgumentException("the number of the original VariantContexts must be the same as the number of VariantContexts in the priority list");
if ( annotateOrigin && priorityListOfVCs == null && originalNumOfVCs == 0)
throw new IllegalArgumentException("Cannot merge calls and annotate their origins without a complete priority list of VariantContexts or the number of original VariantContexts");
final List<VariantContext> preFilteredVCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, genotypeMergeOptions);
// Make sure all variant contexts are padded with reference base in case of indels if necessary
final List<VariantContext> VCs = new ArrayList<VariantContext>();
for (final VariantContext vc : preFilteredVCs) {
if ( ! filteredAreUncalled || vc.isNotFiltered() )
VCs.add(vc);
}
if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled
return null;
// establish the baseline info from the first VC
final VariantContext first = VCs.get(0);
final String name = first.getSource();
final Allele refAllele = determineReferenceAllele(VCs);
final Set<Allele> alleles = new LinkedHashSet<Allele>();
final Set<String> filters = new HashSet<String>();
final Map<String, Object> attributes = new LinkedHashMap<String, Object>();
final Set<String> inconsistentAttributes = new HashSet<String>();
final Set<String> variantSources = new HashSet<String>(); // contains the set of sources we found in our set of VCs that are variant
final Set<String> rsIDs = new LinkedHashSet<String>(1); // most of the time there's one id
VariantContext longestVC = first;
int depth = 0;
int maxAC = -1;
final Map<String, Object> attributesWithMaxAC = new LinkedHashMap<String, Object>();
double log10PError = CommonInfo.NO_LOG10_PERROR;
VariantContext vcWithMaxAC = null;
GenotypesContext genotypes = GenotypesContext.create();
// counting the number of filtered and variant VCs
int nFiltered = 0;
boolean remapped = false;
// cycle through and add info from the other VCs, making sure the loc/reference matches
for ( final VariantContext vc : VCs ) {
if ( longestVC.getStart() != vc.getStart() )
throw new IllegalStateException("BUG: attempting to merge VariantContexts with different start sites: first="+ first.toString() + " second=" + vc.toString());
if ( VariantContextUtils.getSize(vc) > VariantContextUtils.getSize(longestVC) )
longestVC = vc; // get the longest location
nFiltered += vc.isFiltered() ? 1 : 0;
if ( vc.isVariant() ) variantSources.add(vc.getSource());
AlleleMapper alleleMapping = resolveIncompatibleAlleles(refAllele, vc, alleles);
remapped = remapped || alleleMapping.needsRemapping();
alleles.addAll(alleleMapping.values());
mergeGenotypes(genotypes, vc, alleleMapping, genotypeMergeOptions == GenotypeMergeType.UNIQUIFY);
// We always take the QUAL of the first VC with a non-MISSING qual for the combined value
if ( log10PError == CommonInfo.NO_LOG10_PERROR )
log10PError = vc.getLog10PError();
filters.addAll(vc.getFilters());
//
// add attributes
//
// special case DP (add it up) and ID (just preserve it)
//
if (vc.hasAttribute(VCFConstants.DEPTH_KEY))
depth += vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0);
if ( vc.hasID() ) rsIDs.add(vc.getID());
if (mergeInfoWithMaxAC && vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY)) {
String rawAlleleCounts = vc.getAttributeAsString(VCFConstants.ALLELE_COUNT_KEY, null);
// lets see if the string contains a , separator
if (rawAlleleCounts.contains(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR)) {
List<String> alleleCountArray = Arrays.asList(rawAlleleCounts.substring(1, rawAlleleCounts.length() - 1).split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR));
for (String alleleCount : alleleCountArray) {
final int ac = Integer.valueOf(alleleCount.trim());
if (ac > maxAC) {
maxAC = ac;
vcWithMaxAC = vc;
}
}
} else {
final int ac = Integer.valueOf(rawAlleleCounts);
if (ac > maxAC) {
maxAC = ac;
vcWithMaxAC = vc;
}
}
}
for (final Map.Entry<String, Object> p : vc.getAttributes().entrySet()) {
String key = p.getKey();
// if we don't like the key already, don't go anywhere
if ( ! inconsistentAttributes.contains(key) ) {
final boolean alreadyFound = attributes.containsKey(key);
final Object boundValue = attributes.get(key);
final boolean boundIsMissingValue = alreadyFound && boundValue.equals(VCFConstants.MISSING_VALUE_v4);
if ( alreadyFound && ! boundValue.equals(p.getValue()) && ! boundIsMissingValue ) {
// we found the value but we're inconsistent, put it in the exclude list
//System.out.printf("Inconsistent INFO values: %s => %s and %s%n", key, boundValue, p.getValue());
inconsistentAttributes.add(key);
attributes.remove(key);
} else if ( ! alreadyFound || boundIsMissingValue ) { // no value
//if ( vc != first ) System.out.printf("Adding key %s => %s%n", p.getKey(), p.getValue());
attributes.put(key, p.getValue());
}
}
}
}
// if we have more alternate alleles in the merged VC than in one or more of the
// original VCs, we need to strip out the GL/PLs (because they are no longer accurate), as well as allele-dependent attributes like AC,AF, and AD
for ( final VariantContext vc : VCs ) {
if (vc.getAlleles().size() == 1)
continue;
if ( hasPLIncompatibleAlleles(alleles, vc.getAlleles())) {
if ( ! genotypes.isEmpty() ) {
logger.debug(String.format("Stripping PLs at %s:%d-%d due to incompatible alleles merged=%s vs. single=%s",
vc.getChr(), vc.getStart(), vc.getEnd(), alleles, vc.getAlleles()));
}
genotypes = stripPLsAndAD(genotypes);
// this will remove stale AC,AF attributed from vc
VariantContextUtils.calculateChromosomeCounts(vc, attributes, true);
break;
}
}
// take the VC with the maxAC and pull the attributes into a modifiable map
if ( mergeInfoWithMaxAC && vcWithMaxAC != null ) {
attributesWithMaxAC.putAll(vcWithMaxAC.getAttributes());
}
// if at least one record was unfiltered and we want a union, clear all of the filters
if ( (filteredRecordMergeType == FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED && nFiltered != VCs.size()) || filteredRecordMergeType == FilteredRecordMergeType.KEEP_UNCONDITIONAL )
filters.clear();
if ( annotateOrigin ) { // we care about where the call came from
String setValue;
if ( nFiltered == 0 && variantSources.size() == originalNumOfVCs ) // nothing was unfiltered
setValue = MERGE_INTERSECTION;
else if ( nFiltered == VCs.size() ) // everything was filtered out
setValue = MERGE_FILTER_IN_ALL;
else if ( variantSources.isEmpty() ) // everyone was reference
setValue = MERGE_REF_IN_ALL;
else {
final LinkedHashSet<String> s = new LinkedHashSet<String>();
for ( final VariantContext vc : VCs )
if ( vc.isVariant() )
s.add( vc.isFiltered() ? MERGE_FILTER_PREFIX + vc.getSource() : vc.getSource() );
setValue = Utils.join("-", s);
}
if ( setKey != null ) {
attributes.put(setKey, setValue);
if( mergeInfoWithMaxAC && vcWithMaxAC != null ) {
attributesWithMaxAC.put(setKey, setValue);
}
}
}
if ( depth > 0 )
attributes.put(VCFConstants.DEPTH_KEY, String.valueOf(depth));
final String ID = rsIDs.isEmpty() ? VCFConstants.EMPTY_ID_FIELD : Utils.join(",", rsIDs);
final VariantContextBuilder builder = new VariantContextBuilder().source(name).id(ID);
builder.loc(longestVC.getChr(), longestVC.getStart(), longestVC.getEnd());
builder.alleles(alleles);
builder.genotypes(genotypes);
builder.log10PError(log10PError);
builder.filters(filters.isEmpty() ? filters : new TreeSet<String>(filters));
builder.attributes(new TreeMap<String, Object>(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes));
// Trim the padded bases of all alleles if necessary
final VariantContext merged = builder.make();
if ( printMessages && remapped ) System.out.printf("Remapped => %s%n", merged);
return merged;
}
private static final boolean hasPLIncompatibleAlleles(final Collection<Allele> alleleSet1, final Collection<Allele> alleleSet2) {
final Iterator<Allele> it1 = alleleSet1.iterator();
final Iterator<Allele> it2 = alleleSet2.iterator();
while ( it1.hasNext() && it2.hasNext() ) {
final Allele a1 = it1.next();
final Allele a2 = it2.next();
if ( ! a1.equals(a2) )
return true;
}
// by this point, at least one of the iterators is empty. All of the elements
// we've compared are equal up until this point. But it's possible that the
// sets aren't the same size, which is indicated by the test below. If they
// are of the same size, though, the sets are compatible
return it1.hasNext() || it2.hasNext();
}
public static GenotypesContext stripPLsAndAD(GenotypesContext genotypes) {
GenotypesContext newGs = GenotypesContext.create(genotypes.size());
for ( final Genotype g : genotypes ) {
newGs.add(removePLsAndAD(g));
}
return newGs;
}
static private Allele determineReferenceAllele(List<VariantContext> VCs) {
Allele ref = null;
for ( VariantContext vc : VCs ) {
Allele myRef = vc.getReference();
if ( ref == null || ref.length() < myRef.length() )
ref = myRef;
else if ( ref.length() == myRef.length() && ! ref.equals(myRef) )
throw new TribbleException(String.format("The provided variant file(s) have inconsistent references for the same position(s) at %s:%d, %s vs. %s", vc.getChr(), vc.getStart(), ref, myRef));
}
return ref;
}
static private AlleleMapper resolveIncompatibleAlleles(Allele refAllele, VariantContext vc, Set<Allele> allAlleles) {
if ( refAllele.equals(vc.getReference()) )
return new AlleleMapper(vc);
else {
// we really need to do some work. The refAllele is the longest reference allele seen at this
// start site. So imagine it is:
//
// refAllele: ACGTGA
// myRef: ACGT
// myAlt: A
//
// We need to remap all of the alleles in vc to include the extra GA so that
// myRef => refAllele and myAlt => AGA
//
Allele myRef = vc.getReference();
if ( refAllele.length() <= myRef.length() ) throw new IllegalStateException("BUG: myRef="+myRef+" is longer than refAllele="+refAllele);
byte[] extraBases = Arrays.copyOfRange(refAllele.getBases(), myRef.length(), refAllele.length());
// System.out.printf("Remapping allele at %s%n", vc);
// System.out.printf("ref %s%n", refAllele);
// System.out.printf("myref %s%n", myRef );
// System.out.printf("extrabases %s%n", new String(extraBases));
Map<Allele, Allele> map = new HashMap<Allele, Allele>();
for ( Allele a : vc.getAlleles() ) {
if ( a.isReference() )
map.put(a, refAllele);
else {
Allele extended = Allele.extend(a, extraBases);
for ( Allele b : allAlleles )
if ( extended.equals(b) )
extended = b;
// System.out.printf(" Extending %s => %s%n", a, extended);
map.put(a, extended);
}
}
// debugging
// System.out.printf("mapping %s%n", map);
return new AlleleMapper(map);
}
}
public static List<VariantContext> sortVariantContextsByPriority(Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs, GenotypeMergeType mergeOption ) {
if ( mergeOption == GenotypeMergeType.PRIORITIZE && priorityListOfVCs == null )
throw new IllegalArgumentException("Cannot merge calls by priority with a null priority list");
if ( priorityListOfVCs == null || mergeOption == GenotypeMergeType.UNSORTED )
return new ArrayList<VariantContext>(unsortedVCs);
else {
ArrayList<VariantContext> sorted = new ArrayList<VariantContext>(unsortedVCs);
Collections.sort(sorted, new CompareByPriority(priorityListOfVCs));
return sorted;
}
}
private static void mergeGenotypes(GenotypesContext mergedGenotypes, VariantContext oneVC, AlleleMapper alleleMapping, boolean uniqifySamples) {
//TODO: should we add a check for cases when the genotypeMergeOption is REQUIRE_UNIQUE
for ( Genotype g : oneVC.getGenotypes() ) {
String name = mergedSampleName(oneVC.getSource(), g.getSampleName(), uniqifySamples);
if ( ! mergedGenotypes.containsSample(name) ) {
// only add if the name is new
Genotype newG = g;
if ( uniqifySamples || alleleMapping.needsRemapping() ) {
final List<Allele> alleles = alleleMapping.needsRemapping() ? alleleMapping.remap(g.getAlleles()) : g.getAlleles();
newG = new GenotypeBuilder(g).name(name).alleles(alleles).make();
}
mergedGenotypes.add(newG);
}
}
}
public static String mergedSampleName(String trackName, String sampleName, boolean uniqify ) {
return uniqify ? sampleName + "." + trackName : sampleName;
}
public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) {
// see whether we need to trim common reference base from all alleles
final int trimExtent = computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, false);
if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 )
return inputVC;
final List<Allele> alleles = new ArrayList<Allele>();
final GenotypesContext genotypes = GenotypesContext.create();
final Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
for (final Allele a : inputVC.getAlleles()) {
if (a.isSymbolic()) {
alleles.add(a);
originalToTrimmedAlleleMap.put(a, a);
} else {
// get bases for current allele and create a new one with trimmed bases
final byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent);
final Allele trimmedAllele = Allele.create(newBases, a.isReference());
alleles.add(trimmedAllele);
originalToTrimmedAlleleMap.put(a, trimmedAllele);
}
}
// now we can recreate new genotypes with trimmed alleles
for ( final Genotype genotype : inputVC.getGenotypes() ) {
final List<Allele> originalAlleles = genotype.getAlleles();
final List<Allele> trimmedAlleles = new ArrayList<Allele>();
for ( final Allele a : originalAlleles ) {
if ( a.isCalled() )
trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
else
trimmedAlleles.add(Allele.NO_CALL);
}
genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make());
}
return new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length() - 1).alleles(alleles).genotypes(genotypes).make();
}
public static int computeReverseClipping(final List<Allele> unclippedAlleles,
final byte[] ref,
final int forwardClipping,
final boolean allowFullClip) {
int clipping = 0;
boolean stillClipping = true;
while ( stillClipping ) {
for ( final Allele a : unclippedAlleles ) {
if ( a.isSymbolic() )
continue;
// we need to ensure that we don't reverse clip out all of the bases from an allele because we then will have the wrong
// position set for the VariantContext (although it's okay to forward clip it all out, because the position will be fine).
if ( a.length() - clipping == 0 )
return clipping - (allowFullClip ? 0 : 1);
if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 ) {
stillClipping = false;
}
else if ( ref.length == clipping ) {
if ( allowFullClip )
stillClipping = false;
else
return -1;
}
else if ( a.getBases()[a.length()-clipping-1] != ref[ref.length-clipping-1] ) {
stillClipping = false;
}
}
if ( stillClipping )
clipping++;
}
return clipping;
}
public static double computeHardyWeinbergPvalue(VariantContext vc) {
if ( vc.getCalledChrCount() == 0 )
return 0.0;
return HardyWeinbergCalculation.hwCalculate(vc.getHomRefCount(), vc.getHetCount(), vc.getHomVarCount());
}
public static boolean requiresPaddingBase(final List<String> alleles) {
// see whether one of the alleles would be null if trimmed through
for ( final String allele : alleles ) {
if ( allele.isEmpty() )
return true;
}
int clipping = 0;
Character currentBase = null;
while ( true ) {
for ( final String allele : alleles ) {
if ( allele.length() - clipping == 0 )
return true;
char myBase = allele.charAt(clipping);
if ( currentBase == null )
currentBase = myBase;
else if ( currentBase != myBase )
return false;
}
clipping++;
currentBase = null;
}
}
private static class AlleleMapper {
private VariantContext vc = null;
private Map<Allele, Allele> map = null;
public AlleleMapper(VariantContext vc) { this.vc = vc; }
public AlleleMapper(Map<Allele, Allele> map) { this.map = map; }
public boolean needsRemapping() { return this.map != null; }
public Collection<Allele> values() { return map != null ? map.values() : vc.getAlleles(); }
public Allele remap(Allele a) { return map != null && map.containsKey(a) ? map.get(a) : a; }
public List<Allele> remap(List<Allele> as) {
List<Allele> newAs = new ArrayList<Allele>();
for ( Allele a : as ) {
//System.out.printf(" Remapping %s => %s%n", a, remap(a));
newAs.add(remap(a));
}
return newAs;
}
}
private static class CompareByPriority implements Comparator<VariantContext>, Serializable {
List<String> priorityListOfVCs;
public CompareByPriority(List<String> priorityListOfVCs) {
this.priorityListOfVCs = priorityListOfVCs;
}
private int getIndex(VariantContext vc) {
int i = priorityListOfVCs.indexOf(vc.getSource());
if ( i == -1 ) throw new IllegalArgumentException("Priority list " + priorityListOfVCs + " doesn't contain variant context " + vc.getSource());
return i;
}
public int compare(VariantContext vc1, VariantContext vc2) {
return Integer.valueOf(getIndex(vc1)).compareTo(getIndex(vc2));
}
}
} }

View File

@ -26,8 +26,6 @@
package org.broadinstitute.variant.utils; package org.broadinstitute.variant.utils;
import net.sf.samtools.util.StringUtil; import net.sf.samtools.util.StringUtil;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.util.Arrays; import java.util.Arrays;
import java.util.Random; import java.util.Random;
@ -176,7 +174,7 @@ public class BaseUtils {
if ( baseIndex == Base.N.ordinal() ) { if ( baseIndex == Base.N.ordinal() ) {
bases[i] = 'N'; bases[i] = 'N';
} else if ( errorOnBadReferenceBase && baseIndex == -1 ) { } else if ( errorOnBadReferenceBase && baseIndex == -1 ) {
throw new UserException.BadInput("We encountered a non-standard non-IUPAC base in the provided reference: '" + bases[i] + "'"); throw new IllegalStateException("We encountered a non-standard non-IUPAC base in the provided reference: '" + bases[i] + "'");
} }
} }
return bases; return bases;
@ -517,7 +515,7 @@ public class BaseUtils {
case 'N': case 'N':
return 'N'; return 'N';
default: default:
throw new ReviewedStingException("base must be A, C, G or T. " + (char) base + " is not a valid base."); throw new IllegalArgumentException("base must be A, C, G or T. " + (char) base + " is not a valid base.");
} }
} }
} }

View File

@ -141,13 +141,6 @@ public class GeneralUtils {
return normalized; return normalized;
} }
public static double sum(double[] values) {
double s = 0.0;
for (double v : values)
s += v;
return s;
}
public static double arrayMax(final double[] array) { public static double arrayMax(final double[] array) {
return array[maxElementIndex(array, array.length)]; return array[maxElementIndex(array, array.length)];
} }

View File

@ -36,7 +36,7 @@ import java.util.*;
* *
* @author depristo * @author depristo
*/ */
final class CommonInfo { public final class CommonInfo {
public static final double NO_LOG10_PERROR = 1.0; public static final double NO_LOG10_PERROR = 1.0;
private static Set<String> NO_FILTERS = Collections.emptySet(); private static Set<String> NO_FILTERS = Collections.emptySet();

View File

@ -28,15 +28,9 @@ package org.broadinstitute.variant.vcf;
import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord; import net.sf.samtools.SAMSequenceRecord;
import org.apache.commons.io.FilenameUtils; import org.apache.commons.io.FilenameUtils;
import org.broad.tribble.FeatureCodecHeader;
import org.broad.tribble.readers.PositionalBufferedStream;
import org.broadinstitute.variant.utils.GeneralUtils; import org.broadinstitute.variant.utils.GeneralUtils;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.variant.variantcontext.VariantContext;
import java.io.File; import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.*; import java.util.*;
public class VCFUtils { public class VCFUtils {
@ -106,21 +100,6 @@ public class VCFUtils {
return new HashSet<VCFHeaderLine>(map.values()); return new HashSet<VCFHeaderLine>(map.values());
} }
public static String rsIDOfFirstRealVariant(List<VariantContext> VCs, VariantContext.Type type) {
if ( VCs == null )
return null;
String rsID = null;
for ( VariantContext vc : VCs ) {
if ( vc.getType() == type ) {
rsID = vc.getID();
break;
}
}
return rsID;
}
/** /**
* Add / replace the contig header lines in the VCFHeader with the in the reference file and master reference dictionary * Add / replace the contig header lines in the VCFHeader with the in the reference file and master reference dictionary
* *
@ -198,35 +177,6 @@ public class VCFUtils {
return assembly; return assembly;
} }
/**
* Read all of the VCF records from source into memory, returning the header and the VariantContexts
*
* @param source the file to read, must be in VCF4 format
* @return
* @throws java.io.IOException
*/
public static Pair<VCFHeader, List<VariantContext>> readVCF(final File source) throws IOException {
// read in the features
final List<VariantContext> vcs = new ArrayList<VariantContext>();
final VCFCodec codec = new VCFCodec();
PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(source));
FeatureCodecHeader header = codec.readHeader(pbs);
pbs.close();
pbs = new PositionalBufferedStream(new FileInputStream(source));
pbs.skip(header.getHeaderEnd());
final VCFHeader vcfHeader = (VCFHeader)header.getHeaderValue();
while ( ! pbs.isDone() ) {
final VariantContext vc = codec.decode(pbs);
if ( vc != null )
vcs.add(vc);
}
return new Pair<VCFHeader, List<VariantContext>>(vcfHeader, vcs);
}
/** Only displays a warning if warnings are enabled and an identical warning hasn't been already issued */ /** Only displays a warning if warnings are enabled and an identical warning hasn't been already issued */
private static final class HeaderConflictWarner { private static final class HeaderConflictWarner {
boolean emitWarnings; boolean emitWarnings;

View File

@ -35,7 +35,7 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.variant.bcf2.BCF2Utils; import org.broadinstitute.variant.bcf2.BCF2Utils;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.vcf.VCFCodec; import org.broadinstitute.variant.vcf.VCFCodec;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.StingException;

View File

@ -26,7 +26,7 @@
package org.broadinstitute.sting.utils; package org.broadinstitute.sting.utils;
import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.variant.utils.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.testng.annotations.BeforeClass; import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test; import org.testng.annotations.Test;

View File

@ -36,11 +36,9 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.variant.variantcontext.VariantContext; import org.broadinstitute.variant.variantcontext.VariantContext;
import org.broadinstitute.variant.variantcontext.VariantContextTestProvider; import org.broadinstitute.variant.variantcontext.VariantContextTestProvider;
import org.broadinstitute.variant.vcf.VCFCodec; import org.broadinstitute.variant.vcf.VCFCodec;
import org.broadinstitute.variant.vcf.VCFHeader;
import org.testng.Assert; import org.testng.Assert;
import org.testng.annotations.BeforeClass; import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider; import org.testng.annotations.DataProvider;
@ -252,13 +250,13 @@ public class BandPassActivityProfileUnitTest extends BaseTest {
final File file = new File(path); final File file = new File(path);
final VCFCodec codec = new VCFCodec(); final VCFCodec codec = new VCFCodec();
final Pair<VCFHeader, Iterable<VariantContext>> reader = VariantContextTestProvider.readAllVCs(file, codec); final VariantContextTestProvider.VariantContextContainer reader = VariantContextTestProvider.readAllVCs(file, codec);
final List<ActiveRegion> incRegions = new ArrayList<ActiveRegion>(); final List<ActiveRegion> incRegions = new ArrayList<ActiveRegion>();
final BandPassActivityProfile incProfile = new BandPassActivityProfile(genomeLocParser); final BandPassActivityProfile incProfile = new BandPassActivityProfile(genomeLocParser);
final BandPassActivityProfile fullProfile = new BandPassActivityProfile(genomeLocParser); final BandPassActivityProfile fullProfile = new BandPassActivityProfile(genomeLocParser);
int pos = start; int pos = start;
for ( final VariantContext vc : reader.getSecond() ) { for ( final VariantContext vc : reader.getVCs() ) {
if ( vc == null ) continue; if ( vc == null ) continue;
while ( pos < vc.getStart() ) { while ( pos < vc.getStart() ) {
final GenomeLoc loc = genomeLocParser.createGenomeLoc(contig, pos); final GenomeLoc loc = genomeLocParser.createGenomeLoc(contig, pos);

View File

@ -1,6 +1,6 @@
/* /*
* Copyright (c) 2012 The Broad Institute * Copyright (c) 2012 The Broad Institute
* *
* Permission is hereby granted, free of charge, to any person * Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation * obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without * files (the "Software"), to deal in the Software without
@ -9,10 +9,10 @@
* copies of the Software, and to permit persons to whom the * copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following * Software is furnished to do so, subject to the following
* conditions: * conditions:
* *
* The above copyright notice and this permission notice shall be * The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software. * included in all copies or substantial portions of the Software.
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
@ -23,33 +23,25 @@
* THE USE OR OTHER DEALINGS IN THE SOFTWARE. * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
package org.broadinstitute.variant.variantcontext; package org.broadinstitute.sting.utils.variant;
import net.sf.picard.reference.IndexedFastaSequenceFile; import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.variant.VariantBaseTest; import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.variant.utils.GeneralUtils; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.variantcontext.*;
import org.testng.Assert; import org.testng.Assert;
import org.testng.annotations.BeforeSuite; import org.testng.annotations.BeforeSuite;
import org.testng.annotations.DataProvider; import org.testng.annotations.DataProvider;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.*; import java.util.*;
public class VariantContextUtilsUnitTest extends VariantBaseTest { public class GATKVariantContextUtilsUnitTest extends BaseTest {
Allele Aref, T, C, G, Cref, ATC, ATCATC; Allele Aref, T, C, G, Cref, ATC, ATCATC;
@BeforeSuite @BeforeSuite
public void setup() { public void setup() {
final File referenceFile = new File(b37KGReference);
try {
IndexedFastaSequenceFile seq = new IndexedFastaSequenceFile(referenceFile);
}
catch(FileNotFoundException ex) {
throw new RuntimeException(referenceFile.getAbsolutePath(),ex);
}
// alleles // alleles
Aref = Allele.create("A", true); Aref = Allele.create("A", true);
Cref = Allele.create("C", true); Cref = Allele.create("C", true);
@ -186,10 +178,10 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest {
final List<String> priority = vcs2priority(inputs); final List<String> priority = vcs2priority(inputs);
final VariantContext merged = VariantContextUtils.simpleMerge( final VariantContext merged = GATKVariantContextUtils.simpleMerge(
inputs, priority, inputs, priority,
VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, "set", false, false); GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, "set", false, false);
Assert.assertEquals(merged.getAlleles(), cfg.expected); Assert.assertEquals(merged.getAlleles(), cfg.expected);
} }
@ -244,10 +236,10 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest {
inputs.add(new VariantContextBuilder(snpVC1).id(id).make()); inputs.add(new VariantContextBuilder(snpVC1).id(id).make());
} }
final VariantContext merged = VariantContextUtils.simpleMerge( final VariantContext merged = GATKVariantContextUtils.simpleMerge(
inputs, null, inputs, null,
VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
VariantContextUtils.GenotypeMergeType.UNSORTED, false, false, "set", false, false); GATKVariantContextUtils.GenotypeMergeType.UNSORTED, false, false, "set", false, false);
Assert.assertEquals(merged.getID(), cfg.expected); Assert.assertEquals(merged.getID(), cfg.expected);
} }
@ -261,14 +253,14 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest {
List<VariantContext> inputs; List<VariantContext> inputs;
VariantContext expected; VariantContext expected;
String setExpected; String setExpected;
VariantContextUtils.FilteredRecordMergeType type; GATKVariantContextUtils.FilteredRecordMergeType type;
private MergeFilteredTest(String name, VariantContext input1, VariantContext input2, VariantContext expected, String setExpected) { private MergeFilteredTest(String name, VariantContext input1, VariantContext input2, VariantContext expected, String setExpected) {
this(name, input1, input2, expected, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, setExpected); this(name, input1, input2, expected, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, setExpected);
} }
private MergeFilteredTest(String name, VariantContext input1, VariantContext input2, VariantContext expected, VariantContextUtils.FilteredRecordMergeType type, String setExpected) { private MergeFilteredTest(String name, VariantContext input1, VariantContext input2, VariantContext expected, GATKVariantContextUtils.FilteredRecordMergeType type, String setExpected) {
super(MergeFilteredTest.class, name); super(MergeFilteredTest.class, name);
LinkedList<VariantContext> all = new LinkedList<VariantContext>(Arrays.asList(input1, input2)); LinkedList<VariantContext> all = new LinkedList<VariantContext>(Arrays.asList(input1, input2));
this.expected = expected; this.expected = expected;
@ -288,66 +280,66 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest {
makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
VariantContextUtils.MERGE_INTERSECTION); GATKVariantContextUtils.MERGE_INTERSECTION);
new MergeFilteredTest("noFilters", new MergeFilteredTest("noFilters",
makeVC("1", Arrays.asList(Aref, T), "."), makeVC("1", Arrays.asList(Aref, T), "."),
makeVC("2", Arrays.asList(Aref, T), "."), makeVC("2", Arrays.asList(Aref, T), "."),
makeVC("3", Arrays.asList(Aref, T), "."), makeVC("3", Arrays.asList(Aref, T), "."),
VariantContextUtils.MERGE_INTERSECTION); GATKVariantContextUtils.MERGE_INTERSECTION);
new MergeFilteredTest("oneFiltered", new MergeFilteredTest("oneFiltered",
makeVC("1", Arrays.asList(Aref, T), "."), makeVC("1", Arrays.asList(Aref, T), "."),
makeVC("2", Arrays.asList(Aref, T), "FAIL"), makeVC("2", Arrays.asList(Aref, T), "FAIL"),
makeVC("3", Arrays.asList(Aref, T), "."), makeVC("3", Arrays.asList(Aref, T), "."),
String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX)); String.format("1-%s2", GATKVariantContextUtils.MERGE_FILTER_PREFIX));
new MergeFilteredTest("onePassOneFail", new MergeFilteredTest("onePassOneFail",
makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
makeVC("2", Arrays.asList(Aref, T), "FAIL"), makeVC("2", Arrays.asList(Aref, T), "FAIL"),
makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX)); String.format("1-%s2", GATKVariantContextUtils.MERGE_FILTER_PREFIX));
new MergeFilteredTest("AllFiltered", new MergeFilteredTest("AllFiltered",
makeVC("1", Arrays.asList(Aref, T), "FAIL"), makeVC("1", Arrays.asList(Aref, T), "FAIL"),
makeVC("2", Arrays.asList(Aref, T), "FAIL"), makeVC("2", Arrays.asList(Aref, T), "FAIL"),
makeVC("3", Arrays.asList(Aref, T), "FAIL"), makeVC("3", Arrays.asList(Aref, T), "FAIL"),
VariantContextUtils.MERGE_FILTER_IN_ALL); GATKVariantContextUtils.MERGE_FILTER_IN_ALL);
// test ALL vs. ANY // test ALL vs. ANY
new MergeFilteredTest("FailOneUnfiltered", new MergeFilteredTest("FailOneUnfiltered",
makeVC("1", Arrays.asList(Aref, T), "FAIL"), makeVC("1", Arrays.asList(Aref, T), "FAIL"),
makeVC("2", Arrays.asList(Aref, T), "."), makeVC("2", Arrays.asList(Aref, T), "."),
makeVC("3", Arrays.asList(Aref, T), "."), makeVC("3", Arrays.asList(Aref, T), "."),
VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX)); String.format("%s1-2", GATKVariantContextUtils.MERGE_FILTER_PREFIX));
new MergeFilteredTest("OneFailAllUnfilteredArg", new MergeFilteredTest("OneFailAllUnfilteredArg",
makeVC("1", Arrays.asList(Aref, T), "FAIL"), makeVC("1", Arrays.asList(Aref, T), "FAIL"),
makeVC("2", Arrays.asList(Aref, T), "."), makeVC("2", Arrays.asList(Aref, T), "."),
makeVC("3", Arrays.asList(Aref, T), "FAIL"), makeVC("3", Arrays.asList(Aref, T), "FAIL"),
VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ALL_UNFILTERED, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ALL_UNFILTERED,
String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX)); String.format("%s1-2", GATKVariantContextUtils.MERGE_FILTER_PREFIX));
// test excluding allele in filtered record // test excluding allele in filtered record
new MergeFilteredTest("DontIncludeAlleleOfFilteredRecords", new MergeFilteredTest("DontIncludeAlleleOfFilteredRecords",
makeVC("1", Arrays.asList(Aref, T), "."), makeVC("1", Arrays.asList(Aref, T), "."),
makeVC("2", Arrays.asList(Aref, T), "FAIL"), makeVC("2", Arrays.asList(Aref, T), "FAIL"),
makeVC("3", Arrays.asList(Aref, T), "."), makeVC("3", Arrays.asList(Aref, T), "."),
String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX)); String.format("1-%s2", GATKVariantContextUtils.MERGE_FILTER_PREFIX));
// promotion of site from unfiltered to PASSES // promotion of site from unfiltered to PASSES
new MergeFilteredTest("UnfilteredPlusPassIsPass", new MergeFilteredTest("UnfilteredPlusPassIsPass",
makeVC("1", Arrays.asList(Aref, T), "."), makeVC("1", Arrays.asList(Aref, T), "."),
makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS), makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
VariantContextUtils.MERGE_INTERSECTION); GATKVariantContextUtils.MERGE_INTERSECTION);
new MergeFilteredTest("RefInAll", new MergeFilteredTest("RefInAll",
makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS),
makeVC("2", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), makeVC("2", Arrays.asList(Aref), VariantContext.PASSES_FILTERS),
makeVC("3", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), makeVC("3", Arrays.asList(Aref), VariantContext.PASSES_FILTERS),
VariantContextUtils.MERGE_REF_IN_ALL); GATKVariantContextUtils.MERGE_REF_IN_ALL);
new MergeFilteredTest("RefInOne", new MergeFilteredTest("RefInOne",
makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS), makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS),
@ -361,8 +353,8 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest {
@Test(dataProvider = "mergeFiltered") @Test(dataProvider = "mergeFiltered")
public void testMergeFiltered(MergeFilteredTest cfg) { public void testMergeFiltered(MergeFilteredTest cfg) {
final List<String> priority = vcs2priority(cfg.inputs); final List<String> priority = vcs2priority(cfg.inputs);
final VariantContext merged = VariantContextUtils.simpleMerge( final VariantContext merged = GATKVariantContextUtils.simpleMerge(
cfg.inputs, priority, cfg.type, VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false); cfg.inputs, priority, cfg.type, GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false);
// test alleles are equal // test alleles are equal
Assert.assertEquals(merged.getAlleles(), cfg.expected.getAlleles()); Assert.assertEquals(merged.getAlleles(), cfg.expected.getAlleles());
@ -487,9 +479,9 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest {
@Test(dataProvider = "mergeGenotypes") @Test(dataProvider = "mergeGenotypes")
public void testMergeGenotypes(MergeGenotypesTest cfg) { public void testMergeGenotypes(MergeGenotypesTest cfg) {
final VariantContext merged = VariantContextUtils.simpleMerge( final VariantContext merged = GATKVariantContextUtils.simpleMerge(
cfg.inputs, cfg.priority, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, cfg.inputs, cfg.priority, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false); GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false);
// test alleles are equal // test alleles are equal
Assert.assertEquals(merged.getAlleles(), cfg.expected.getAlleles()); Assert.assertEquals(merged.getAlleles(), cfg.expected.getAlleles());
@ -528,9 +520,9 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest {
final VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, -1)); final VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, -1));
final VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, -2)); final VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, -2));
final VariantContext merged = VariantContextUtils.simpleMerge( final VariantContext merged = GATKVariantContextUtils.simpleMerge(
Arrays.asList(vc1, vc2), null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, Arrays.asList(vc1, vc2), null, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
VariantContextUtils.GenotypeMergeType.UNIQUIFY, false, false, "set", false, false); GATKVariantContextUtils.GenotypeMergeType.UNIQUIFY, false, false, "set", false, false);
// test genotypes // test genotypes
Assert.assertEquals(merged.getSampleNames(), new HashSet<String>(Arrays.asList("s1.1", "s1.2"))); Assert.assertEquals(merged.getSampleNames(), new HashSet<String>(Arrays.asList("s1.1", "s1.2")));
@ -561,12 +553,12 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest {
VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS); VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS);
VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS); VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS);
final VariantContext merged = VariantContextUtils.simpleMerge( final VariantContext merged = GATKVariantContextUtils.simpleMerge(
Arrays.asList(vc1, vc2), priority, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, Arrays.asList(vc1, vc2), priority, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
VariantContextUtils.GenotypeMergeType.PRIORITIZE, annotate, false, set, false, false); GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, annotate, false, set, false, false);
if ( annotate ) if ( annotate )
Assert.assertEquals(merged.getAttribute(set), VariantContextUtils.MERGE_INTERSECTION); Assert.assertEquals(merged.getAttribute(set), GATKVariantContextUtils.MERGE_INTERSECTION);
else else
Assert.assertFalse(merged.hasAttribute(set)); Assert.assertFalse(merged.hasAttribute(set));
} }
@ -583,78 +575,6 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest {
return priority; return priority;
} }
// --------------------------------------------------------------------------------
//
// Test repeats
//
// --------------------------------------------------------------------------------
private class RepeatDetectorTest extends TestDataProvider {
String ref;
boolean isTrueRepeat;
VariantContext vc;
private RepeatDetectorTest(boolean isTrueRepeat, String ref, String refAlleleString, String ... altAlleleStrings) {
super(RepeatDetectorTest.class);
this.isTrueRepeat = isTrueRepeat;
this.ref = ref;
List<Allele> alleles = new LinkedList<Allele>();
final Allele refAllele = Allele.create(refAlleleString, true);
alleles.add(refAllele);
for ( final String altString: altAlleleStrings) {
final Allele alt = Allele.create(altString, false);
alleles.add(alt);
}
VariantContextBuilder builder = new VariantContextBuilder("test", "chr1", 1, refAllele.length(), alleles);
this.vc = builder.make();
}
public String toString() {
return String.format("%s refBases=%s trueRepeat=%b vc=%s", super.toString(), ref, isTrueRepeat, vc);
}
}
@DataProvider(name = "RepeatDetectorTest")
public Object[][] makeRepeatDetectorTest() {
new RepeatDetectorTest(true, "NAAC", "N", "NA");
new RepeatDetectorTest(true, "NAAC", "NA", "N");
new RepeatDetectorTest(false, "NAAC", "NAA", "N");
new RepeatDetectorTest(false, "NAAC", "N", "NC");
new RepeatDetectorTest(false, "AAC", "A", "C");
// running out of ref bases => false
new RepeatDetectorTest(false, "NAAC", "N", "NCAGTA");
// complex repeats
new RepeatDetectorTest(true, "NATATATC", "N", "NAT");
new RepeatDetectorTest(true, "NATATATC", "N", "NATA");
new RepeatDetectorTest(true, "NATATATC", "N", "NATAT");
new RepeatDetectorTest(true, "NATATATC", "NAT", "N");
new RepeatDetectorTest(false, "NATATATC", "NATA", "N");
new RepeatDetectorTest(false, "NATATATC", "NATAT", "N");
// multi-allelic
new RepeatDetectorTest(true, "NATATATC", "N", "NAT", "NATAT");
new RepeatDetectorTest(true, "NATATATC", "N", "NAT", "NATA");
new RepeatDetectorTest(true, "NATATATC", "NAT", "N", "NATAT");
new RepeatDetectorTest(true, "NATATATC", "NAT", "N", "NATA"); // two As
new RepeatDetectorTest(false, "NATATATC", "NAT", "N", "NATC"); // false
new RepeatDetectorTest(false, "NATATATC", "NAT", "N", "NCC"); // false
new RepeatDetectorTest(false, "NATATATC", "NAT", "NATAT", "NCC"); // false
return RepeatDetectorTest.getTests(RepeatDetectorTest.class);
}
@Test(dataProvider = "RepeatDetectorTest")
public void testRepeatDetectorTest(RepeatDetectorTest cfg) {
// test alleles are equal
Assert.assertEquals(VariantContextUtils.isTandemRepeat(cfg.vc, cfg.ref.getBytes()), cfg.isTrueRepeat);
}
// -------------------------------------------------------------------------------- // --------------------------------------------------------------------------------
// //
// basic allele clipping test // basic allele clipping test
@ -698,10 +618,11 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest {
@Test(dataProvider = "ReverseClippingPositionTestProvider") @Test(dataProvider = "ReverseClippingPositionTestProvider")
public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) { public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) {
int result = VariantContextUtils.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false); int result = GATKVariantContextUtils.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false);
Assert.assertEquals(result, cfg.expectedClip); Assert.assertEquals(result, cfg.expectedClip);
} }
// -------------------------------------------------------------------------------- // --------------------------------------------------------------------------------
// //
// test splitting into bi-allelics // test splitting into bi-allelics
@ -776,7 +697,7 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest {
@Test(dataProvider = "SplitBiallelics") @Test(dataProvider = "SplitBiallelics")
public void testSplitBiallelicsNoGenotypes(final VariantContext vc, final List<VariantContext> expectedBiallelics) { public void testSplitBiallelicsNoGenotypes(final VariantContext vc, final List<VariantContext> expectedBiallelics) {
final List<VariantContext> biallelics = VariantContextUtils.splitVariantContextToBiallelics(vc); final List<VariantContext> biallelics = GATKVariantContextUtils.splitVariantContextToBiallelics(vc);
Assert.assertEquals(biallelics.size(), expectedBiallelics.size()); Assert.assertEquals(biallelics.size(), expectedBiallelics.size());
for ( int i = 0; i < biallelics.size(); i++ ) { for ( int i = 0; i < biallelics.size(); i++ ) {
final VariantContext actual = biallelics.get(i); final VariantContext actual = biallelics.get(i);
@ -790,14 +711,14 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest {
final List<Genotype> genotypes = new ArrayList<Genotype>(); final List<Genotype> genotypes = new ArrayList<Genotype>();
int sampleI = 0; int sampleI = 0;
for ( final List<Allele> alleles : GeneralUtils.makePermutations(vc.getAlleles(), 2, true) ) { for ( final List<Allele> alleles : Utils.makePermutations(vc.getAlleles(), 2, true) ) {
genotypes.add(GenotypeBuilder.create("sample" + sampleI++, alleles)); genotypes.add(GenotypeBuilder.create("sample" + sampleI++, alleles));
} }
genotypes.add(GenotypeBuilder.createMissing("missing", 2)); genotypes.add(GenotypeBuilder.createMissing("missing", 2));
final VariantContext vcWithGenotypes = new VariantContextBuilder(vc).genotypes(genotypes).make(); final VariantContext vcWithGenotypes = new VariantContextBuilder(vc).genotypes(genotypes).make();
final List<VariantContext> biallelics = VariantContextUtils.splitVariantContextToBiallelics(vcWithGenotypes); final List<VariantContext> biallelics = GATKVariantContextUtils.splitVariantContextToBiallelics(vcWithGenotypes);
for ( int i = 0; i < biallelics.size(); i++ ) { for ( int i = 0; i < biallelics.size(); i++ ) {
final VariantContext actual = biallelics.get(i); final VariantContext actual = biallelics.get(i);
Assert.assertEquals(actual.getNSamples(), vcWithGenotypes.getNSamples()); // not dropping any samples Assert.assertEquals(actual.getNSamples(), vcWithGenotypes.getNSamples()); // not dropping any samples
@ -812,4 +733,159 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest {
} }
} }
} }
}
// --------------------------------------------------------------------------------
//
// Test repeats
//
// --------------------------------------------------------------------------------
private class RepeatDetectorTest extends TestDataProvider {
String ref;
boolean isTrueRepeat;
VariantContext vc;
private RepeatDetectorTest(boolean isTrueRepeat, String ref, String refAlleleString, String ... altAlleleStrings) {
super(RepeatDetectorTest.class);
this.isTrueRepeat = isTrueRepeat;
this.ref = ref;
List<Allele> alleles = new LinkedList<Allele>();
final Allele refAllele = Allele.create(refAlleleString, true);
alleles.add(refAllele);
for ( final String altString: altAlleleStrings) {
final Allele alt = Allele.create(altString, false);
alleles.add(alt);
}
VariantContextBuilder builder = new VariantContextBuilder("test", "chr1", 1, refAllele.length(), alleles);
this.vc = builder.make();
}
public String toString() {
return String.format("%s refBases=%s trueRepeat=%b vc=%s", super.toString(), ref, isTrueRepeat, vc);
}
}
@DataProvider(name = "RepeatDetectorTest")
public Object[][] makeRepeatDetectorTest() {
new RepeatDetectorTest(true, "NAAC", "N", "NA");
new RepeatDetectorTest(true, "NAAC", "NA", "N");
new RepeatDetectorTest(false, "NAAC", "NAA", "N");
new RepeatDetectorTest(false, "NAAC", "N", "NC");
new RepeatDetectorTest(false, "AAC", "A", "C");
// running out of ref bases => false
new RepeatDetectorTest(false, "NAAC", "N", "NCAGTA");
// complex repeats
new RepeatDetectorTest(true, "NATATATC", "N", "NAT");
new RepeatDetectorTest(true, "NATATATC", "N", "NATA");
new RepeatDetectorTest(true, "NATATATC", "N", "NATAT");
new RepeatDetectorTest(true, "NATATATC", "NAT", "N");
new RepeatDetectorTest(false, "NATATATC", "NATA", "N");
new RepeatDetectorTest(false, "NATATATC", "NATAT", "N");
// multi-allelic
new RepeatDetectorTest(true, "NATATATC", "N", "NAT", "NATAT");
new RepeatDetectorTest(true, "NATATATC", "N", "NAT", "NATA");
new RepeatDetectorTest(true, "NATATATC", "NAT", "N", "NATAT");
new RepeatDetectorTest(true, "NATATATC", "NAT", "N", "NATA"); // two As
new RepeatDetectorTest(false, "NATATATC", "NAT", "N", "NATC"); // false
new RepeatDetectorTest(false, "NATATATC", "NAT", "N", "NCC"); // false
new RepeatDetectorTest(false, "NATATATC", "NAT", "NATAT", "NCC"); // false
return RepeatDetectorTest.getTests(RepeatDetectorTest.class);
}
@Test(dataProvider = "RepeatDetectorTest")
public void testRepeatDetectorTest(RepeatDetectorTest cfg) {
// test alleles are equal
Assert.assertEquals(GATKVariantContextUtils.isTandemRepeat(cfg.vc, cfg.ref.getBytes()), cfg.isTrueRepeat);
}
@Test
public void testRepeatAllele() {
Allele nullR = Allele.create("A", true);
Allele nullA = Allele.create("A", false);
Allele atc = Allele.create("AATC", false);
Allele atcatc = Allele.create("AATCATC", false);
Allele ccccR = Allele.create("ACCCC", true);
Allele cc = Allele.create("ACC", false);
Allele cccccc = Allele.create("ACCCCCC", false);
Allele gagaR = Allele.create("AGAGA", true);
Allele gagagaga = Allele.create("AGAGAGAGA", false);
// - / ATC [ref] from 20-22
String delLoc = "chr1";
int delLocStart = 20;
int delLocStop = 22;
// - [ref] / ATC from 20-20
String insLoc = "chr1";
int insLocStart = 20;
int insLocStop = 20;
Pair<List<Integer>,byte[]> result;
byte[] refBytes = "TATCATCATCGGA".getBytes();
Assert.assertEquals(GATKVariantContextUtils.findNumberofRepetitions("ATG".getBytes(), "ATGATGATGATG".getBytes(), true),4);
Assert.assertEquals(GATKVariantContextUtils.findNumberofRepetitions("G".getBytes(), "ATGATGATGATG".getBytes(), true),0);
Assert.assertEquals(GATKVariantContextUtils.findNumberofRepetitions("T".getBytes(), "T".getBytes(), true),1);
Assert.assertEquals(GATKVariantContextUtils.findNumberofRepetitions("AT".getBytes(), "ATGATGATCATG".getBytes(), true),1);
Assert.assertEquals(GATKVariantContextUtils.findNumberofRepetitions("CCC".getBytes(), "CCCCCCCC".getBytes(), true),2);
Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("ATG".getBytes()),3);
Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("AAA".getBytes()),1);
Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("CACACAC".getBytes()),7);
Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("CACACA".getBytes()),2);
Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("CATGCATG".getBytes()),4);
Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("AATAATA".getBytes()),7);
// A*,ATC, context = ATC ATC ATC : (ATC)3 -> (ATC)4
VariantContext vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(nullR,atc)).make();
result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
Assert.assertEquals(result.getFirst().toArray()[0],3);
Assert.assertEquals(result.getFirst().toArray()[1],4);
Assert.assertEquals(result.getSecond().length,3);
// ATC*,A,ATCATC
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+3, Arrays.asList(Allele.create("AATC", true),nullA,atcatc)).make();
result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
Assert.assertEquals(result.getFirst().toArray()[0],3);
Assert.assertEquals(result.getFirst().toArray()[1],2);
Assert.assertEquals(result.getFirst().toArray()[2],4);
Assert.assertEquals(result.getSecond().length,3);
// simple non-tandem deletion: CCCC*, -
refBytes = "TCCCCCCCCATG".getBytes();
vc = new VariantContextBuilder("foo", delLoc, 10, 14, Arrays.asList(ccccR,nullA)).make();
result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
Assert.assertEquals(result.getFirst().toArray()[0],8);
Assert.assertEquals(result.getFirst().toArray()[1],4);
Assert.assertEquals(result.getSecond().length,1);
// CCCC*,CC,-,CCCCCC, context = CCC: (C)7 -> (C)5,(C)3,(C)9
refBytes = "TCCCCCCCAGAGAGAG".getBytes();
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+4, Arrays.asList(ccccR,cc, nullA,cccccc)).make();
result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
Assert.assertEquals(result.getFirst().toArray()[0],7);
Assert.assertEquals(result.getFirst().toArray()[1],5);
Assert.assertEquals(result.getFirst().toArray()[2],3);
Assert.assertEquals(result.getFirst().toArray()[3],9);
Assert.assertEquals(result.getSecond().length,1);
// GAGA*,-,GAGAGAGA
refBytes = "TGAGAGAGAGATTT".getBytes();
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+4, Arrays.asList(gagaR, nullA,gagagaga)).make();
result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
Assert.assertEquals(result.getFirst().toArray()[0],5);
Assert.assertEquals(result.getFirst().toArray()[1],3);
Assert.assertEquals(result.getFirst().toArray()[2],7);
Assert.assertEquals(result.getSecond().length,2);
}
}

View File

@ -231,9 +231,9 @@ public class VariantContextBenchmark extends SimpleBenchmark {
toMerge.add(new VariantContextBuilder(vc).genotypes(gc).make()); toMerge.add(new VariantContextBuilder(vc).genotypes(gc).make());
} }
VariantContextUtils.simpleMerge(toMerge, null, GATKVariantContextUtils.simpleMerge(toMerge, null,
VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
VariantContextUtils.GenotypeMergeType.UNSORTED, GATKVariantContextUtils.GenotypeMergeType.UNSORTED,
true, false, "set", false, true); true, false, "set", false, true);
} }
}; };

View File

@ -1,3 +1,28 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.variant; package org.broadinstitute.variant;
import org.testng.Assert; import org.testng.Assert;

View File

@ -32,7 +32,6 @@ import org.broadinstitute.variant.VariantBaseTest;
import org.broadinstitute.variant.bcf2.BCF2Codec; import org.broadinstitute.variant.bcf2.BCF2Codec;
import org.broadinstitute.variant.utils.GeneralUtils; import org.broadinstitute.variant.utils.GeneralUtils;
import org.broadinstitute.variant.vcf.*; import org.broadinstitute.variant.vcf.*;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.variant.variantcontext.writer.Options; import org.broadinstitute.variant.variantcontext.writer.Options;
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
import org.testng.Assert; import org.testng.Assert;
@ -74,6 +73,24 @@ public class VariantContextTestProvider {
} }
} }
public static class VariantContextContainer {
private VCFHeader header;
private Iterable<VariantContext> vcs;
public VariantContextContainer( VCFHeader header, Iterable<VariantContext> vcs ) {
this.header = header;
this.vcs = vcs;
}
public VCFHeader getHeader() {
return header;
}
public Iterable<VariantContext> getVCs() {
return vcs;
}
}
public abstract static class VariantContextIOTest { public abstract static class VariantContextIOTest {
public String toString() { public String toString() {
return "VariantContextIOTest:" + getExtension(); return "VariantContextIOTest:" + getExtension();
@ -150,15 +167,15 @@ public class VariantContextTestProvider {
if ( ENABLE_SOURCE_VCF_TESTS ) { if ( ENABLE_SOURCE_VCF_TESTS ) {
for ( final File file : testSourceVCFs ) { for ( final File file : testSourceVCFs ) {
VCFCodec codec = new VCFCodec(); VCFCodec codec = new VCFCodec();
Pair<VCFHeader, Iterable<VariantContext>> x = readAllVCs( file, codec ); VariantContextContainer x = readAllVCs( file, codec );
List<VariantContext> fullyDecoded = new ArrayList<VariantContext>(); List<VariantContext> fullyDecoded = new ArrayList<VariantContext>();
for ( final VariantContext raw : x.getSecond() ) { for ( final VariantContext raw : x.getVCs() ) {
if ( raw != null ) if ( raw != null )
fullyDecoded.add(raw.fullyDecode(x.getFirst(), false)); fullyDecoded.add(raw.fullyDecode(x.getHeader(), false));
} }
TEST_DATAs.add(new VariantContextTestData(x.getFirst(), fullyDecoded)); TEST_DATAs.add(new VariantContextTestData(x.getHeader(), fullyDecoded));
} }
} }
} }
@ -616,8 +633,8 @@ public class VariantContextTestProvider {
writeVCsToFile(writer, header, data.vcs); writeVCsToFile(writer, header, data.vcs);
// ensure writing of expected == actual // ensure writing of expected == actual
final Pair<VCFHeader, Iterable<VariantContext>> p = readAllVCs(tmpFile, tester.makeCodec()); final VariantContextContainer p = readAllVCs(tmpFile, tester.makeCodec());
final Iterable<VariantContext> actual = p.getSecond(); final Iterable<VariantContext> actual = p.getVCs();
int i = 0; int i = 0;
for ( final VariantContext readVC : actual ) { for ( final VariantContext readVC : actual ) {
@ -655,14 +672,14 @@ public class VariantContextTestProvider {
writeVCsToFile(writer, header, vcs); writeVCsToFile(writer, header, vcs);
// ensure writing of expected == actual // ensure writing of expected == actual
final Pair<VCFHeader, Iterable<VariantContext>> p = readAllVCs(tmpFile, tester.makeCodec()); final VariantContextContainer p = readAllVCs(tmpFile, tester.makeCodec());
final Iterable<VariantContext> actual = p.getSecond(); final Iterable<VariantContext> actual = p.getVCs();
assertEquals(actual, expected); assertEquals(actual, expected);
if ( recurse ) { if ( recurse ) {
// if we are doing a recursive test, grab a fresh iterator over the written values // if we are doing a recursive test, grab a fresh iterator over the written values
final Iterable<VariantContext> read = readAllVCs(tmpFile, tester.makeCodec()).getSecond(); final Iterable<VariantContext> read = readAllVCs(tmpFile, tester.makeCodec()).getVCs();
testReaderWriter(tester, p.getFirst(), expected, read, false); testReaderWriter(tester, p.getHeader(), expected, read, false);
} }
} }
@ -683,7 +700,7 @@ public class VariantContextTestProvider {
* @return * @return
* @throws IOException * @throws IOException
*/ */
public final static Pair<VCFHeader, Iterable<VariantContext>> readAllVCs( final File source, final FeatureCodec<VariantContext> codec ) throws IOException { public final static VariantContextContainer readAllVCs( final File source, final FeatureCodec<VariantContext> codec ) throws IOException {
// read in the features // read in the features
PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(source)); PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(source));
FeatureCodecHeader header = codec.readHeader(pbs); FeatureCodecHeader header = codec.readHeader(pbs);
@ -693,7 +710,7 @@ public class VariantContextTestProvider {
pbs.skip(header.getHeaderEnd()); pbs.skip(header.getHeaderEnd());
final VCFHeader vcfHeader = (VCFHeader)header.getHeaderValue(); final VCFHeader vcfHeader = (VCFHeader)header.getHeaderValue();
return new Pair<VCFHeader, Iterable<VariantContext>>(vcfHeader, new VCIterable(pbs, codec, vcfHeader)); return new VariantContextContainer(vcfHeader, new VCIterable(pbs, codec, vcfHeader));
} }
public static class VCIterable implements Iterable<VariantContext>, Iterator<VariantContext> { public static class VCIterable implements Iterable<VariantContext>, Iterator<VariantContext> {
@ -738,10 +755,10 @@ public class VariantContextTestProvider {
} }
public static void assertVCFandBCFFilesAreTheSame(final File vcfFile, final File bcfFile) throws IOException { public static void assertVCFandBCFFilesAreTheSame(final File vcfFile, final File bcfFile) throws IOException {
final Pair<VCFHeader, Iterable<VariantContext>> vcfData = readAllVCs(vcfFile, new VCFCodec()); final VariantContextContainer vcfData = readAllVCs(vcfFile, new VCFCodec());
final Pair<VCFHeader, Iterable<VariantContext>> bcfData = readAllVCs(bcfFile, new BCF2Codec()); final VariantContextContainer bcfData = readAllVCs(bcfFile, new BCF2Codec());
assertEquals(bcfData.getFirst(), vcfData.getFirst()); assertEquals(bcfData.getHeader(), vcfData.getHeader());
assertEquals(bcfData.getSecond(), vcfData.getSecond()); assertEquals(bcfData.getVCs(), vcfData.getVCs());
} }
public static void assertEquals(final Iterable<VariantContext> actual, final Iterable<VariantContext> expected) { public static void assertEquals(final Iterable<VariantContext> actual, final Iterable<VariantContext> expected) {

View File

@ -28,9 +28,7 @@ package org.broadinstitute.variant.variantcontext;
// the imports for unit testing. // the imports for unit testing.
import org.broadinstitute.variant.VariantBaseTest; import org.broadinstitute.variant.VariantBaseTest;
import org.broadinstitute.variant.utils.Pair;
import org.testng.annotations.BeforeSuite; import org.testng.annotations.BeforeSuite;
import org.testng.annotations.BeforeMethod; import org.testng.annotations.BeforeMethod;
import org.testng.annotations.DataProvider; import org.testng.annotations.DataProvider;
@ -484,78 +482,6 @@ public class VariantContextUnitTest extends VariantBaseTest {
Assert.assertNotNull(vc.getFiltersMaybeNull()); Assert.assertNotNull(vc.getFiltersMaybeNull());
} }
@Test
public void testRepeatAllele() {
Allele nullR = Allele.create("A", true);
Allele nullA = Allele.create("A", false);
Allele atc = Allele.create("AATC", false);
Allele atcatc = Allele.create("AATCATC", false);
Allele ccccR = Allele.create("ACCCC", true);
Allele cc = Allele.create("ACC", false);
Allele cccccc = Allele.create("ACCCCCC", false);
Allele gagaR = Allele.create("AGAGA", true);
Allele gagagaga = Allele.create("AGAGAGAGA", false);
Pair<List<Integer>,byte[]> result;
byte[] refBytes = "TATCATCATCGGA".getBytes();
Assert.assertEquals(VariantContextUtils.findNumberofRepetitions("ATG".getBytes(), "ATGATGATGATG".getBytes(), true),4);
Assert.assertEquals(VariantContextUtils.findNumberofRepetitions("G".getBytes(), "ATGATGATGATG".getBytes(), true),0);
Assert.assertEquals(VariantContextUtils.findNumberofRepetitions("T".getBytes(), "T".getBytes(), true),1);
Assert.assertEquals(VariantContextUtils.findNumberofRepetitions("AT".getBytes(), "ATGATGATCATG".getBytes(), true),1);
Assert.assertEquals(VariantContextUtils.findNumberofRepetitions("CCC".getBytes(), "CCCCCCCC".getBytes(), true),2);
Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("ATG".getBytes()),3);
Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("AAA".getBytes()),1);
Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("CACACAC".getBytes()),7);
Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("CACACA".getBytes()),2);
Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("CATGCATG".getBytes()),4);
Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("AATAATA".getBytes()),7);
// A*,ATC, context = ATC ATC ATC : (ATC)3 -> (ATC)4
VariantContext vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(nullR,atc)).make();
result = VariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
Assert.assertEquals(result.getFirst().toArray()[0],3);
Assert.assertEquals(result.getFirst().toArray()[1],4);
Assert.assertEquals(result.getSecond().length,3);
// ATC*,A,ATCATC
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+3, Arrays.asList(Allele.create("AATC", true),nullA,atcatc)).make();
result = VariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
Assert.assertEquals(result.getFirst().toArray()[0],3);
Assert.assertEquals(result.getFirst().toArray()[1],2);
Assert.assertEquals(result.getFirst().toArray()[2],4);
Assert.assertEquals(result.getSecond().length,3);
// simple non-tandem deletion: CCCC*, -
refBytes = "TCCCCCCCCATG".getBytes();
vc = new VariantContextBuilder("foo", delLoc, 10, 14, Arrays.asList(ccccR,nullA)).make();
result = VariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
Assert.assertEquals(result.getFirst().toArray()[0],8);
Assert.assertEquals(result.getFirst().toArray()[1],4);
Assert.assertEquals(result.getSecond().length,1);
// CCCC*,CC,-,CCCCCC, context = CCC: (C)7 -> (C)5,(C)3,(C)9
refBytes = "TCCCCCCCAGAGAGAG".getBytes();
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+4, Arrays.asList(ccccR,cc, nullA,cccccc)).make();
result = VariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
Assert.assertEquals(result.getFirst().toArray()[0],7);
Assert.assertEquals(result.getFirst().toArray()[1],5);
Assert.assertEquals(result.getFirst().toArray()[2],3);
Assert.assertEquals(result.getFirst().toArray()[3],9);
Assert.assertEquals(result.getSecond().length,1);
// GAGA*,-,GAGAGAGA
refBytes = "TGAGAGAGAGATTT".getBytes();
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+4, Arrays.asList(gagaR, nullA,gagagaga)).make();
result = VariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
Assert.assertEquals(result.getFirst().toArray()[0],5);
Assert.assertEquals(result.getFirst().toArray()[1],3);
Assert.assertEquals(result.getFirst().toArray()[2],7);
Assert.assertEquals(result.getSecond().length,2);
}
@Test @Test
public void testGetGenotypeCounts() { public void testGetGenotypeCounts() {
List<Allele> alleles = Arrays.asList(Aref, T); List<Allele> alleles = Arrays.asList(Aref, T);