org.broadinstitute.variant refactor, part 2

-removed sting dependencies from test classes
-removed org.apache.log4j dependency
-misc cleanup
This commit is contained in:
David Roazen 2013-01-08 14:45:50 -05:00
parent 1599c9a20e
commit f63f27aa13
104 changed files with 979 additions and 825 deletions

View File

@ -57,7 +57,7 @@ import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.MannWhitneyU;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.variant.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.variant.variantcontext.Allele;

View File

@ -53,11 +53,11 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
import org.broadinstitute.variant.vcf.VCFHeaderLineCount;
import org.broadinstitute.variant.vcf.VCFHeaderLineType;
import org.broadinstitute.variant.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.variantcontext.VariantContext;
import java.util.Arrays;
@ -79,7 +79,7 @@ public class TandemRepeatAnnotator extends InfoFieldAnnotation implements Standa
if ( !vc.isIndel())
return null;
Pair<List<Integer>,byte[]> result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, ref.getForwardBases());
Pair<List<Integer>,byte[]> result = VariantContextUtils.getNumTandemRepeatUnits(vc, ref.getForwardBases());
if (result == null)
return null;

View File

@ -63,7 +63,7 @@ import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.clipping.ReadClipper;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;

View File

@ -49,7 +49,7 @@ package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
import net.sf.samtools.SAMFileHeader;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

View File

@ -46,7 +46,7 @@
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

View File

@ -52,7 +52,7 @@ import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.gatk.downsampling.ReservoirDownsampler;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;

View File

@ -53,7 +53,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.clipping.ReadClipper;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

View File

@ -51,7 +51,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactACcounts;
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactACset;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.variant.vcf.VCFConstants;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;

View File

@ -56,7 +56,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.variant.vcf.VCFConstants;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.variant.variantcontext.*;

View File

@ -49,8 +49,8 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection;
import org.broadinstitute.sting.utils.pairhmm.PairHMM;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.VariantContext;
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
public class UnifiedArgumentCollection extends StandardCallerArgumentCollection {
@ -172,7 +172,7 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
Sample ploidy - equivalent to number of chromosomes per pool. In pooled experiments this should be = # of samples in pool * individual sample ploidy
*/
@Argument(shortName="ploidy", fullName="sample_ploidy", doc="Plody (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy).", required=false)
public int samplePloidy = GATKVariantContextUtils.DEFAULT_PLOIDY;
public int samplePloidy = VariantContextUtils.DEFAULT_PLOIDY;
@Hidden
@Argument(shortName="minqs", fullName="min_quality_score", doc="Min quality score to consider. Smaller numbers process faster. Default: Q1.", required=false)

View File

@ -61,7 +61,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
import org.broadinstitute.variant.vcf.*;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
@ -304,7 +304,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
headerInfo.add(new VCFInfoHeaderLine(UnifiedGenotyperEngine.NUMBER_OF_DISCOVERED_ALLELES_KEY, 1, VCFHeaderLineType.Integer, "Number of alternate alleles discovered (but not necessarily genotyped) at this site"));
// add the pool values for each genotype
if (UAC.samplePloidy != GATKVariantContextUtils.DEFAULT_PLOIDY) {
if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) {
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the alternate allele count, in the same order as listed, for each individual sample"));
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the alternate allele fraction, in the same order as listed, for each individual sample"));
}

View File

@ -61,7 +61,6 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcResult;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.variant.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -135,7 +134,7 @@ public class UnifiedGenotyperEngine {
// ---------------------------------------------------------------------------------------------------------
@Requires({"toolkit != null", "UAC != null"})
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) {
this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), GATKVariantContextUtils.DEFAULT_PLOIDY);
this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), VariantContextUtils.DEFAULT_PLOIDY);
}
@Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0","ploidy>0"})
@ -526,7 +525,7 @@ public class UnifiedGenotyperEngine {
// if we are subsetting alleles (either because there were too many or because some were not polymorphic)
// then we may need to trim the alleles (because the original VariantContext may have had to pad at the end).
if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext ) // limitedContext callers need to handle allele trimming on their own to keep their perReadAlleleLikelihoodMap alleles in sync
vcCall = GATKVariantContextUtils.reverseTrimAlleles(vcCall);
vcCall = VariantContextUtils.reverseTrimAlleles(vcCall);
if ( annotationEngine != null && !limitedContext ) { // limitedContext callers need to handle annotations on their own by calling their own annotationEngine
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
@ -663,7 +662,7 @@ public class UnifiedGenotyperEngine {
private void determineGLModelsToUse() {
String modelPrefix = "";
if ( !UAC.GLmodel.name().contains(GPSTRING) && UAC.samplePloidy != GATKVariantContextUtils.DEFAULT_PLOIDY )
if ( !UAC.GLmodel.name().contains(GPSTRING) && UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY )
modelPrefix = GPSTRING;
if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") ) {

View File

@ -47,7 +47,6 @@
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.*;
import java.util.*;
@ -106,7 +105,7 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
alleles.add(vc.getReference());
alleles.addAll(chooseMostLikelyAlternateAlleles(vc, getMaxAltAlleles()));
builder.alleles(alleles);
builder.genotypes(GATKVariantContextUtils.subsetDiploidAlleles(vc, alleles, false));
builder.genotypes(VariantContextUtils.subsetDiploidAlleles(vc, alleles, false));
return builder.make();
} else {
return vc;
@ -352,6 +351,6 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
final List<Allele> allelesToUse,
final boolean assignGenotypes,
final int ploidy) {
return GATKVariantContextUtils.subsetDiploidAlleles(vc, allelesToUse, assignGenotypes);
return VariantContextUtils.subsetDiploidAlleles(vc, allelesToUse, assignGenotypes);
}
}

View File

@ -47,10 +47,10 @@
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.Allele;
import org.broadinstitute.variant.variantcontext.Genotype;
import org.broadinstitute.variant.variantcontext.GenotypesContext;
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
import java.util.ArrayList;
@ -92,7 +92,7 @@ abstract class ExactAFCalc extends AFCalc {
if ( sample.hasLikelihoods() ) {
double[] gls = sample.getLikelihoods().getAsVector();
if ( MathUtils.sum(gls) < GATKVariantContextUtils.SUM_GL_THRESH_NOCALL )
if ( MathUtils.sum(gls) < VariantContextUtils.SUM_GL_THRESH_NOCALL )
genotypeLikelihoods.add(gls);
}
}

View File

@ -48,7 +48,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
import org.broadinstitute.sting.gatk.walkers.genotyper.GeneralPloidyGenotypeLikelihoods;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.variant.variantcontext.*;
@ -554,7 +553,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
}
// if there is no mass on the (new) likelihoods, then just no-call the sample
if ( MathUtils.sum(newLikelihoods) > GATKVariantContextUtils.SUM_GL_THRESH_NOCALL ) {
if ( MathUtils.sum(newLikelihoods) > VariantContextUtils.SUM_GL_THRESH_NOCALL ) {
newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES));
}
else {
@ -566,7 +565,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
gb.PL(newLikelihoods);
// if we weren't asked to assign a genotype, then just no-call the sample
if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > GATKVariantContextUtils.SUM_GL_THRESH_NOCALL )
if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > VariantContextUtils.SUM_GL_THRESH_NOCALL )
gb.alleles(NO_CALL_ALLELES);
else
assignGenotype(gb, newLikelihoods, allelesToUse, ploidy);

View File

@ -47,7 +47,7 @@
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.variant.variantcontext.Allele;
import org.broadinstitute.variant.variantcontext.VariantContext;

View File

@ -57,7 +57,6 @@ import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.variant.variantcontext.*;
@ -204,7 +203,7 @@ public class GenotypingEngine {
VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, call);
if( annotatedCall.getAlleles().size() != mergedVC.getAlleles().size() ) { // some alleles were removed so reverseTrimming might be necessary!
annotatedCall = GATKVariantContextUtils.reverseTrimAlleles(annotatedCall);
annotatedCall = VariantContextUtils.reverseTrimAlleles(annotatedCall);
}
returnCalls.add( annotatedCall );

View File

@ -72,7 +72,6 @@ import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState;
import org.broadinstitute.sting.utils.activeregion.ActivityProfileState;
import org.broadinstitute.sting.utils.clipping.ReadClipper;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.vcf.*;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
@ -298,7 +297,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
samplesList.addAll( samples );
// initialize the UnifiedGenotyper Engine which is used to call into the exact model
final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection( SCAC ); // this adapter is used so that the full set of unused UG arguments aren't exposed to the HC user
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY);
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
// create a UAC but with the exactCallsLog = null, so we only output the log for the HC caller itself, if requested
UnifiedArgumentCollection simpleUAC = new UnifiedArgumentCollection(UAC);
@ -308,7 +307,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
simpleUAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.min( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING ); // low values used for isActive determination only, default/user-specified values used for actual calling
simpleUAC.CONTAMINATION_FRACTION = 0.0;
simpleUAC.exactCallsLog = null;
UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), simpleUAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY);
UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), simpleUAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
// initialize the output VCF header
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());

View File

@ -63,7 +63,7 @@ import org.broadinstitute.sting.gatk.walkers.BAQMode;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.exceptions.UserException;

View File

@ -328,7 +328,7 @@ public class GenotypeAndValidate extends RodWalker<GenotypeAndValidate.CountedDa
if (vcfWriter != null) {
Map<String, VCFHeader> header = GATKVCFUtils.getVCFHeadersFromRodPrefix(getToolkit(), alleles.getName());
samples = SampleUtils.getSampleList(header, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(header.values(), logger);
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(header.values(), true);
headerLines.add(new VCFHeaderLine("source", "GenotypeAndValidate"));
vcfWriter.writeHeader(new VCFHeader(headerLines, samples));
}

View File

@ -61,7 +61,6 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.*;
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.variant.vcf.*;
@ -116,7 +115,7 @@ public class RegenotypeVariants extends RodWalker<Integer, Integer> implements T
String trackName = variantCollection.variants.getName();
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName));
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY);
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(trackName)));

View File

@ -51,7 +51,7 @@ import com.google.java.contract.Requires;
import org.apache.commons.math.MathException;
import org.apache.commons.math.stat.inference.ChiSquareTestImpl;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.Collection;

View File

@ -58,7 +58,7 @@ import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.collections.NestedHashMap;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;

View File

@ -51,7 +51,7 @@ import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;

View File

@ -49,7 +49,7 @@ package org.broadinstitute.sting.utils.recalibration.covariates;
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
import java.util.Arrays;
@ -73,7 +73,7 @@ public class RepeatLengthCovariate implements ExperimentalCovariate {
int maxRL = 0;
for (int str = 1; str <= 8; str++) {
if (i + str <= readBytes.length) {
maxRL = Math.max(maxRL, GATKVariantContextUtils.findNumberofRepetitions(
maxRL = Math.max(maxRL, VariantContextUtils.findNumberofRepetitions(
Arrays.copyOfRange(readBytes, i, i + str),
Arrays.copyOfRange(readBytes, i, readBytes.length)
));

View File

@ -52,7 +52,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.variant.variantcontext.*;
import org.testng.Assert;

View File

@ -49,7 +49,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.variant.variantcontext.Allele;
import org.broadinstitute.variant.variantcontext.VariantContext;
import org.testng.Assert;

View File

@ -52,7 +52,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.manage
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;

View File

@ -164,7 +164,7 @@ public class CombineVariantsUnitTest {
ArrayList<VCFHeader> headers = new ArrayList<VCFHeader>();
headers.add(one);
headers.add(two);
Set<VCFHeaderLine> lines = VCFUtils.smartMergeHeaders(headers, null);
Set<VCFHeaderLine> lines = VCFUtils.smartMergeHeaders(headers, false);
Assert.assertEquals(lines.size(), VCFHeaderUnitTest.VCF4headerStringCount);
}
@ -175,7 +175,7 @@ public class CombineVariantsUnitTest {
ArrayList<VCFHeader> headers = new ArrayList<VCFHeader>();
headers.add(one);
headers.add(two);
Set<VCFHeaderLine> lines = VCFUtils.smartMergeHeaders(headers, null);
Set<VCFHeaderLine> lines = VCFUtils.smartMergeHeaders(headers, false);
Assert.assertEquals(lines.size(), VCFHeaderUnitTest.VCF4headerStringCount);
}
@ -186,7 +186,7 @@ public class CombineVariantsUnitTest {
ArrayList<VCFHeader> headers = new ArrayList<VCFHeader>();
headers.add(one);
headers.add(two);
Set<VCFHeaderLine> lines = VCFUtils.smartMergeHeaders(headers, null);
Set<VCFHeaderLine> lines = VCFUtils.smartMergeHeaders(headers, false);
Assert.assertEquals(lines.size(), VCFHeaderUnitTest.VCF4headerStringCount);
}
}

View File

@ -46,20 +46,16 @@
package org.broadinstitute.sting.gatk.walkers.variantutils;
import com.sun.org.apache.xpath.internal.operations.Gt;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.walkers.variantutils.ConcordanceMetrics;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.variant.variantcontext.*;
import org.broadinstitute.variant.variantcontext.Allele;
import org.broadinstitute.variant.variantcontext.Genotype;
import org.broadinstitute.variant.variantcontext.GenotypeBuilder;
import org.broadinstitute.variant.variantcontext.GenotypeType;
import org.broadinstitute.variant.variantcontext.GenotypesContext;
import org.broadinstitute.variant.variantcontext.VariantContext;
import org.broadinstitute.variant.variantcontext.VariantContextBuilder;
import org.broadinstitute.variant.vcf.VCFCodec;
@ -67,7 +63,6 @@ import org.broadinstitute.variant.vcf.VCFHeader;
import org.testng.annotations.Test;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.PositionalBufferedStream;
import org.broadinstitute.variant.vcf.*;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
@ -75,7 +70,6 @@ import java.io.File;
import java.io.FileNotFoundException;
import java.io.StringBufferInputStream;
import java.util.ArrayList;
import java.util.Set;
import java.util.Arrays;
import java.util.List;
import net.sf.picard.reference.ReferenceSequenceFile;

View File

@ -31,7 +31,7 @@ import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.classloader.JVMUtils;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.ApplicationDetails;

View File

@ -26,7 +26,7 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import java.util.*;

View File

@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.ArrayList;

View File

@ -40,7 +40,7 @@ import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.file.FSLockWithShared;

View File

@ -35,7 +35,7 @@ import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.gatk.samples.SampleDB;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.recalibration.BQSRMode;

View File

@ -46,7 +46,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.refseq.RefSeqCodec;
import org.broadinstitute.sting.utils.codecs.refseq.RefSeqFeature;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;

View File

@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.io.PrintStream;

View File

@ -26,7 +26,6 @@
package org.broadinstitute.sting.gatk.walkers.diagnostics;
import ca.mcgill.mcb.pcingola.interval.Intron;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.ArgumentCollection;
import org.broadinstitute.sting.commandline.Output;
@ -36,17 +35,13 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.variant.variantcontext.Genotype;
import org.broadinstitute.variant.variantcontext.GenotypesContext;
import org.broadinstitute.variant.variantcontext.VariantContext;
import java.io.*;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
/**
* print intervals file with all the variant sites that have "most" ( >= 90% by default) of the samples with "good" (>= 10 by default)coverage ("most" and "good" can be set in the command line).

View File

@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.variant.variantcontext.VariantContext;

View File

@ -32,9 +32,8 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RefWalker;
import org.broadinstitute.sting.gatk.walkers.WalkerName;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.io.PrintStream;

View File

@ -36,7 +36,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RefWalker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.io.PrintStream;

View File

@ -43,7 +43,7 @@ import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.io.PrintStream;

View File

@ -35,7 +35,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RefWalker;
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.util.Collections;

View File

@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

View File

@ -45,7 +45,7 @@ import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.clipping.ClippingOp;
import org.broadinstitute.sting.utils.clipping.ClippingRepresentation;
import org.broadinstitute.sting.utils.clipping.ReadClipper;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

View File

@ -49,7 +49,6 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.VariantEvalUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.vcf.VCFHeader;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -198,7 +197,7 @@ public class VariantEval extends RodWalker<Integer, Integer> implements TreeRedu
protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50;
@Argument(shortName="ploidy", fullName="samplePloidy", doc="Per-sample ploidy (number of chromosomes per sample)", required=false)
protected int ploidy = GATKVariantContextUtils.DEFAULT_PLOIDY;
protected int ploidy = VariantContextUtils.DEFAULT_PLOIDY;
@Argument(fullName="ancestralAlignments", shortName="aa", doc="Fasta file with ancestral alleles", required=false)
private File ancestralAlignmentsFile = null;

View File

@ -34,7 +34,7 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.AnalysisModuleScanner;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.EvaluationContext;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.StingException;

View File

@ -27,8 +27,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.VariantContext;
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
import java.util.Arrays;
import java.util.List;
@ -51,7 +51,7 @@ public class TandemRepeat extends VariantStratifier {
public List<Object> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
if ( eval == null || ! eval.isIndel() )
return ALL;
else if ( GATKVariantContextUtils.isTandemRepeat(eval, ref.getForwardBases()) ) {
else if ( VariantContextUtils.isTandemRepeat(eval, ref.getForwardBases()) ) {
print("REPEAT", eval, ref);
return REPEAT;
} else {

View File

@ -27,9 +27,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.manage
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.EvaluationContext;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.*;

View File

@ -217,7 +217,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
if ( SET_KEY.toLowerCase().equals("null") )
SET_KEY = null;
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger);
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), true);
if ( SET_KEY != null )
headerLines.add(new VCFInfoHeaderLine(SET_KEY, 1, VCFHeaderLineType.String, "Source VCF for the merged record in CombineVariants"));
if ( !ASSUME_IDENTICAL_SAMPLES )

View File

@ -32,7 +32,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
import org.broadinstitute.variant.variantcontext.*;
import org.broadinstitute.variant.vcf.VCFHeader;

View File

@ -153,7 +153,7 @@ public class SelectHeaders extends RodWalker<Integer, Integer> implements TreeRe
List<String> rodNames = Arrays.asList(variantCollection.variants.getName());
Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames);
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger);
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), true);
headerLines.add(new VCFHeaderLine(VCFHeader.SOURCE_KEY, "SelectHeaders"));

View File

@ -400,7 +400,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
}
// Initialize VCF header
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger);
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), true);
headerLines.add(new VCFHeaderLine("source", "SelectVariants"));
if (KEEP_ORIGINAL_CHR_COUNTS) {

View File

@ -36,9 +36,9 @@ import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.CommandLineProgram;
import org.broadinstitute.variant.bcf2.BCF2Codec;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.variant.vcf.VCFCodec;
import org.broadinstitute.variant.vcf.VCFHeader;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.variant.variantcontext.VariantContext;
import org.broadinstitute.variant.variantcontext.writer.Options;

View File

@ -33,7 +33,7 @@ import org.apache.commons.math.MathException;
import org.apache.commons.math.distribution.NormalDistribution;
import org.apache.commons.math.distribution.NormalDistributionImpl;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.exceptions.StingException;
import java.io.Serializable;

View File

@ -1,34 +1,34 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.exceptions.StingException;
import java.util.*;

View File

@ -29,8 +29,8 @@ import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMReadGroupRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.variant.vcf.VCFHeader;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.text.ListFileUtils;
import org.broadinstitute.sting.utils.text.XReadLines;
import org.broadinstitute.variant.variantcontext.VariantContextUtils;

View File

@ -32,7 +32,7 @@ import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMUtils;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.ReadUtils;

View File

@ -29,7 +29,7 @@ import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;

View File

@ -30,7 +30,7 @@ import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;

View File

@ -37,7 +37,7 @@ import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.classloader.JVMUtils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.StingException;

View File

@ -39,7 +39,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.text.XReadLines;

View File

@ -32,7 +32,7 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.NGSPlatform;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.variant.utils.BaseUtils;

View File

@ -31,7 +31,7 @@ import org.broad.tribble.readers.PositionalBufferedStream;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.variant.variantcontext.VariantContext;
import org.broadinstitute.variant.vcf.*;
@ -147,32 +147,4 @@ public class GATKVCFUtils {
return VCFUtils.withUpdatedContigs(header, engine.getArguments().referenceFile, engine.getMasterSequenceDictionary());
}
/**
* Read all of the VCF records from source into memory, returning the header and the VariantContexts
*
* @param source the file to read, must be in VCF4 format
* @return
* @throws java.io.IOException
*/
public static Pair<VCFHeader, List<VariantContext>> readVCF(final File source) throws IOException {
// read in the features
final List<VariantContext> vcs = new ArrayList<VariantContext>();
final VCFCodec codec = new VCFCodec();
PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(source));
FeatureCodecHeader header = codec.readHeader(pbs);
pbs.close();
pbs = new PositionalBufferedStream(new FileInputStream(source));
pbs.skip(header.getHeaderEnd());
final VCFHeader vcfHeader = (VCFHeader)header.getHeaderValue();
while ( ! pbs.isDone() ) {
final VariantContext vc = codec.decode(pbs);
if ( vc != null )
vcs.add(vc);
}
return new Pair<VCFHeader, List<VariantContext>>(vcfHeader, vcs);
}
}

View File

@ -25,22 +25,12 @@
package org.broadinstitute.sting.utils.variant;
import com.google.java.contract.Requires;
import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.variantcontext.*;
import java.util.*;
public class GATKVariantContextUtils {
public static final int DEFAULT_PLOIDY = 2;
public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
private static final List<Allele> NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
/**
* create a genome location, given a variant context
* @param genomeLocParser parser
@ -51,425 +41,4 @@ public class GATKVariantContextUtils {
return genomeLocParser.createGenomeLoc(vc.getChr(), vc.getStart(), vc.getEnd(), true);
}
/**
* Returns true iff VC is an non-complex indel where every allele represents an expansion or
* contraction of a series of identical bases in the reference.
*
* For example, suppose the ref bases are CTCTCTGA, which includes a 3x repeat of CTCTCT
*
* If VC = -/CT, then this function returns true because the CT insertion matches exactly the
* upcoming reference.
* If VC = -/CTA then this function returns false because the CTA isn't a perfect match
*
* Now consider deletions:
*
* If VC = CT/- then again the same logic applies and this returns true
* The case of CTA/- makes no sense because it doesn't actually match the reference bases.
*
* The logic of this function is pretty simple. Take all of the non-null alleles in VC. For
* each insertion allele of n bases, check if that allele matches the next n reference bases.
* For each deletion allele of n bases, check if this matches the reference bases at n - 2 n,
* as it must necessarily match the first n bases. If this test returns true for all
* alleles you are a tandem repeat, otherwise you are not.
*
* @param vc
* @param refBasesStartingAtVCWithPad not this is assumed to include the PADDED reference
* @return
*/
@Requires({"vc != null", "refBasesStartingAtVCWithPad != null && refBasesStartingAtVCWithPad.length > 0"})
public static boolean isTandemRepeat(final VariantContext vc, final byte[] refBasesStartingAtVCWithPad) {
final String refBasesStartingAtVCWithoutPad = new String(refBasesStartingAtVCWithPad).substring(1);
if ( ! vc.isIndel() ) // only indels are tandem repeats
return false;
final Allele ref = vc.getReference();
for ( final Allele allele : vc.getAlternateAlleles() ) {
if ( ! isRepeatAllele(ref, allele, refBasesStartingAtVCWithoutPad) )
return false;
}
// we've passed all of the tests, so we are a repeat
return true;
}
/**
*
* @param vc
* @param refBasesStartingAtVCWithPad
* @return
*/
@Requires({"vc != null", "refBasesStartingAtVCWithPad != null && refBasesStartingAtVCWithPad.length > 0"})
public static Pair<List<Integer>,byte[]> getNumTandemRepeatUnits(final VariantContext vc, final byte[] refBasesStartingAtVCWithPad) {
final boolean VERBOSE = false;
final String refBasesStartingAtVCWithoutPad = new String(refBasesStartingAtVCWithPad).substring(1);
if ( ! vc.isIndel() ) // only indels are tandem repeats
return null;
final Allele refAllele = vc.getReference();
final byte[] refAlleleBases = Arrays.copyOfRange(refAllele.getBases(), 1, refAllele.length());
byte[] repeatUnit = null;
final ArrayList<Integer> lengths = new ArrayList<Integer>();
for ( final Allele allele : vc.getAlternateAlleles() ) {
Pair<int[],byte[]> result = getNumTandemRepeatUnits(refAlleleBases, Arrays.copyOfRange(allele.getBases(), 1, allele.length()), refBasesStartingAtVCWithoutPad.getBytes());
final int[] repetitionCount = result.first;
// repetition count = 0 means allele is not a tandem expansion of context
if (repetitionCount[0] == 0 || repetitionCount[1] == 0)
return null;
if (lengths.size() == 0) {
lengths.add(repetitionCount[0]); // add ref allele length only once
}
lengths.add(repetitionCount[1]); // add this alt allele's length
repeatUnit = result.second;
if (VERBOSE) {
System.out.println("RefContext:"+refBasesStartingAtVCWithoutPad);
System.out.println("Ref:"+refAllele.toString()+" Count:" + String.valueOf(repetitionCount[0]));
System.out.println("Allele:"+allele.toString()+" Count:" + String.valueOf(repetitionCount[1]));
System.out.println("RU:"+new String(repeatUnit));
}
}
return new Pair<List<Integer>, byte[]>(lengths,repeatUnit);
}
protected static Pair<int[],byte[]> getNumTandemRepeatUnits(final byte[] refBases, final byte[] altBases, final byte[] remainingRefContext) {
/* we can't exactly apply same logic as in basesAreRepeated() to compute tandem unit and number of repeated units.
Consider case where ref =ATATAT and we have an insertion of ATAT. Natural description is (AT)3 -> (AT)5.
*/
byte[] longB;
// find first repeat unit based on either ref or alt, whichever is longer
if (altBases.length > refBases.length)
longB = altBases;
else
longB = refBases;
// see if non-null allele (either ref or alt, whichever is longer) can be decomposed into several identical tandem units
// for example, -*,CACA needs to first be decomposed into (CA)2
final int repeatUnitLength = findRepeatedSubstring(longB);
final byte[] repeatUnit = Arrays.copyOf(longB, repeatUnitLength);
final int[] repetitionCount = new int[2];
// repetitionCount[0] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(refBases, remainingRefContext));
// repetitionCount[1] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(altBases, remainingRefContext));
int repetitionsInRef = findNumberofRepetitions(repeatUnit,refBases);
repetitionCount[0] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(refBases, remainingRefContext))-repetitionsInRef;
repetitionCount[1] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(altBases, remainingRefContext))-repetitionsInRef;
return new Pair<int[], byte[]>(repetitionCount, repeatUnit);
}
/**
* Find out if a string can be represented as a tandem number of substrings.
* For example ACTACT is a 2-tandem of ACT,
* but ACTACA is not.
*
* @param bases String to be tested
* @return Length of repeat unit, if string can be represented as tandem of substring (if it can't
* be represented as one, it will be just the length of the input string)
*/
public static int findRepeatedSubstring(byte[] bases) {
int repLength;
for (repLength=1; repLength <=bases.length; repLength++) {
final byte[] candidateRepeatUnit = Arrays.copyOf(bases,repLength);
boolean allBasesMatch = true;
for (int start = repLength; start < bases.length; start += repLength ) {
// check that remaining of string is exactly equal to repeat unit
final byte[] basePiece = Arrays.copyOfRange(bases,start,start+candidateRepeatUnit.length);
if (!Arrays.equals(candidateRepeatUnit, basePiece)) {
allBasesMatch = false;
break;
}
}
if (allBasesMatch)
return repLength;
}
return repLength;
}
/**
* Helper routine that finds number of repetitions a string consists of.
* For example, for string ATAT and repeat unit AT, number of repetitions = 2
* @param repeatUnit Substring
* @param testString String to test
* @return Number of repetitions (0 if testString is not a concatenation of n repeatUnit's
*/
public static int findNumberofRepetitions(byte[] repeatUnit, byte[] testString) {
int numRepeats = 0;
for (int start = 0; start < testString.length; start += repeatUnit.length) {
int end = start + repeatUnit.length;
byte[] unit = Arrays.copyOfRange(testString,start, end);
if(Arrays.equals(unit,repeatUnit))
numRepeats++;
else
return numRepeats;
}
return numRepeats;
}
/**
* Helper function for isTandemRepeat that checks that allele matches somewhere on the reference
* @param ref
* @param alt
* @param refBasesStartingAtVCWithoutPad
* @return
*/
protected static boolean isRepeatAllele(final Allele ref, final Allele alt, final String refBasesStartingAtVCWithoutPad) {
if ( ! Allele.oneIsPrefixOfOther(ref, alt) )
return false; // we require one allele be a prefix of another
if ( ref.length() > alt.length() ) { // we are a deletion
return basesAreRepeated(ref.getBaseString(), alt.getBaseString(), refBasesStartingAtVCWithoutPad, 2);
} else { // we are an insertion
return basesAreRepeated(alt.getBaseString(), ref.getBaseString(), refBasesStartingAtVCWithoutPad, 1);
}
}
protected static boolean basesAreRepeated(final String l, final String s, final String ref, final int minNumberOfMatches) {
final String potentialRepeat = l.substring(s.length()); // skip s bases
for ( int i = 0; i < minNumberOfMatches; i++) {
final int start = i * potentialRepeat.length();
final int end = (i+1) * potentialRepeat.length();
if ( ref.length() < end )
return false; // we ran out of bases to test
final String refSub = ref.substring(start, end);
if ( ! refSub.equals(potentialRepeat) )
return false; // repeat didn't match, fail
}
return true; // we passed all tests, we matched
}
/**
* Assign genotypes (GTs) to the samples in the Variant Context greedily based on the PLs
*
* @param vc variant context with genotype likelihoods
* @return genotypes context
*/
public static GenotypesContext assignDiploidGenotypes(final VariantContext vc) {
return subsetDiploidAlleles(vc, vc.getAlleles(), true);
}
/**
* Split variant context into its biallelic components if there are more than 2 alleles
*
* For VC has A/B/C alleles, returns A/B and A/C contexts.
* Genotypes are all no-calls now (it's not possible to fix them easily)
* Alleles are right trimmed to satisfy VCF conventions
*
* If vc is biallelic or non-variant it is just returned
*
* Chromosome counts are updated (but they are by definition 0)
*
* @param vc a potentially multi-allelic variant context
* @return a list of bi-allelic (or monomorphic) variant context
*/
public static List<VariantContext> splitVariantContextToBiallelics(final VariantContext vc) {
if ( ! vc.isVariant() || vc.isBiallelic() )
// non variant or biallelics already satisfy the contract
return Collections.singletonList(vc);
else {
final List<VariantContext> biallelics = new LinkedList<VariantContext>();
for ( final Allele alt : vc.getAlternateAlleles() ) {
VariantContextBuilder builder = new VariantContextBuilder(vc);
final List<Allele> alleles = Arrays.asList(vc.getReference(), alt);
builder.alleles(alleles);
builder.genotypes(subsetDiploidAlleles(vc, alleles, false));
VariantContextUtils.calculateChromosomeCounts(builder, true);
biallelics.add(reverseTrimAlleles(builder.make()));
}
return biallelics;
}
}
/**
* subset the Variant Context to the specific set of alleles passed in (pruning the PLs appropriately)
*
* @param vc variant context with genotype likelihoods
* @param allelesToUse which alleles from the vc are okay to use; *** must be in the same relative order as those in the original VC ***
* @param assignGenotypes true if we should update the genotypes based on the (subsetted) PLs
* @return genotypes
*/
public static GenotypesContext subsetDiploidAlleles(final VariantContext vc,
final List<Allele> allelesToUse,
final boolean assignGenotypes) {
// the genotypes with PLs
final GenotypesContext oldGTs = vc.getGenotypes();
// samples
final List<String> sampleIndices = oldGTs.getSampleNamesOrderedByName();
// the new genotypes to create
final GenotypesContext newGTs = GenotypesContext.create();
// we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward
final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
final int numNewAltAlleles = allelesToUse.size() - 1;
// which PLs should be carried forward?
ArrayList<Integer> likelihoodIndexesToUse = null;
// an optimization: if we are supposed to use all (or none in the case of a ref call) of the alleles,
// then we can keep the PLs as is; otherwise, we determine which ones to keep
if ( numNewAltAlleles != numOriginalAltAlleles && numNewAltAlleles > 0 ) {
likelihoodIndexesToUse = new ArrayList<Integer>(30);
final boolean[] altAlleleIndexToUse = new boolean[numOriginalAltAlleles];
for ( int i = 0; i < numOriginalAltAlleles; i++ ) {
if ( allelesToUse.contains(vc.getAlternateAllele(i)) )
altAlleleIndexToUse[i] = true;
}
// numLikelihoods takes total # of alleles. Use default # of chromosomes (ploidy) = 2
final int numLikelihoods = GenotypeLikelihoods.numLikelihoods(1 + numOriginalAltAlleles, DEFAULT_PLOIDY);
for ( int PLindex = 0; PLindex < numLikelihoods; PLindex++ ) {
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
// consider this entry only if both of the alleles are good
if ( (alleles.alleleIndex1 == 0 || altAlleleIndexToUse[alleles.alleleIndex1 - 1]) && (alleles.alleleIndex2 == 0 || altAlleleIndexToUse[alleles.alleleIndex2 - 1]) )
likelihoodIndexesToUse.add(PLindex);
}
}
// create the new genotypes
for ( int k = 0; k < oldGTs.size(); k++ ) {
final Genotype g = oldGTs.get(sampleIndices.get(k));
if ( !g.hasLikelihoods() ) {
newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES));
continue;
}
// create the new likelihoods array from the alleles we are allowed to use
final double[] originalLikelihoods = g.getLikelihoods().getAsVector();
double[] newLikelihoods;
if ( likelihoodIndexesToUse == null ) {
newLikelihoods = originalLikelihoods;
} else {
newLikelihoods = new double[likelihoodIndexesToUse.size()];
int newIndex = 0;
for ( int oldIndex : likelihoodIndexesToUse )
newLikelihoods[newIndex++] = originalLikelihoods[oldIndex];
// might need to re-normalize
newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true);
}
// if there is no mass on the (new) likelihoods, then just no-call the sample
if ( MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) {
newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES));
}
else {
final GenotypeBuilder gb = new GenotypeBuilder(g);
if ( numNewAltAlleles == 0 )
gb.noPL();
else
gb.PL(newLikelihoods);
// if we weren't asked to assign a genotype, then just no-call the sample
if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) {
gb.alleles(NO_CALL_ALLELES);
}
else {
// find the genotype with maximum likelihoods
int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods);
GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
gb.alleles(Arrays.asList(allelesToUse.get(alleles.alleleIndex1), allelesToUse.get(alleles.alleleIndex2)));
if ( numNewAltAlleles != 0 ) gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods));
}
newGTs.add(gb.make());
}
}
return newGTs;
}
public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) {
// see whether we need to trim common reference base from all alleles
final int trimExtent = computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, false);
if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 )
return inputVC;
final List<Allele> alleles = new ArrayList<Allele>();
final GenotypesContext genotypes = GenotypesContext.create();
final Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
for (final Allele a : inputVC.getAlleles()) {
if (a.isSymbolic()) {
alleles.add(a);
originalToTrimmedAlleleMap.put(a, a);
} else {
// get bases for current allele and create a new one with trimmed bases
final byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent);
final Allele trimmedAllele = Allele.create(newBases, a.isReference());
alleles.add(trimmedAllele);
originalToTrimmedAlleleMap.put(a, trimmedAllele);
}
}
// now we can recreate new genotypes with trimmed alleles
for ( final Genotype genotype : inputVC.getGenotypes() ) {
final List<Allele> originalAlleles = genotype.getAlleles();
final List<Allele> trimmedAlleles = new ArrayList<Allele>();
for ( final Allele a : originalAlleles ) {
if ( a.isCalled() )
trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
else
trimmedAlleles.add(Allele.NO_CALL);
}
genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make());
}
return new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length() - 1).alleles(alleles).genotypes(genotypes).make();
}
public static int computeReverseClipping(final List<Allele> unclippedAlleles,
final byte[] ref,
final int forwardClipping,
final boolean allowFullClip) {
int clipping = 0;
boolean stillClipping = true;
while ( stillClipping ) {
for ( final Allele a : unclippedAlleles ) {
if ( a.isSymbolic() )
continue;
// we need to ensure that we don't reverse clip out all of the bases from an allele because we then will have the wrong
// position set for the VariantContext (although it's okay to forward clip it all out, because the position will be fine).
if ( a.length() - clipping == 0 )
return clipping - (allowFullClip ? 0 : 1);
if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 ) {
stillClipping = false;
}
else if ( ref.length == clipping ) {
if ( allowFullClip )
stillClipping = false;
else
return -1;
}
else if ( a.getBases()[a.length()-clipping-1] != ref[ref.length-clipping-1] ) {
stillClipping = false;
}
}
if ( stillClipping )
clipping++;
}
return clipping;
}
}

View File

@ -27,13 +27,13 @@ package org.broadinstitute.variant.bcf2;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.apache.log4j.Logger;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.FeatureCodecHeader;
import org.broad.tribble.TribbleException;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.PositionalBufferedStream;
import org.broadinstitute.variant.utils.GeneralUtils;
import org.broadinstitute.variant.vcf.*;
import org.broadinstitute.variant.variantcontext.*;
@ -50,8 +50,6 @@ import java.util.Map;
* Decode BCF2 files
*/
public final class BCF2Codec implements FeatureCodec<VariantContext> {
final protected static Logger logger = Logger.getLogger(BCF2Codec.class);
private final static int ALLOWED_MAJOR_VERSION = 2;
private final static int MIN_MINOR_VERSION = 1;
@ -149,7 +147,9 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
if ( bcfVersion.getMinorVersion() < MIN_MINOR_VERSION )
error("BCF2Codec can only process BCF2 files with minor version >= " + MIN_MINOR_VERSION + " but this file has minor version " + bcfVersion.getMinorVersion());
logger.debug("Parsing data stream with BCF version " + bcfVersion);
if ( GeneralUtils.DEBUG_MODE_ENABLED ) {
System.err.println("Parsing data stream with BCF version " + bcfVersion);
}
final int headerSizeInBytes = BCF2Type.INT32.read(inputStream);

View File

@ -27,9 +27,8 @@ package org.broadinstitute.variant.bcf2;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.apache.log4j.Logger;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.TribbleException;
import org.broadinstitute.variant.utils.GeneralUtils;
import java.io.ByteArrayInputStream;
import java.io.IOException;
@ -38,8 +37,6 @@ import java.util.ArrayList;
import java.util.Arrays;
public final class BCF2Decoder {
final protected static Logger logger = Logger.getLogger(FeatureCodec.class);
byte[] recordBytes = null;
ByteArrayInputStream recordStream = null;
@ -343,8 +340,9 @@ public final class BCF2Decoder {
bytesRead += read1;
}
if ( nReadAttempts > 1 ) // TODO -- remove me
logger.warn("Required multiple read attempts to actually get the entire BCF2 block, unexpected behavior");
if ( GeneralUtils.DEBUG_MODE_ENABLED && nReadAttempts > 1 ) { // TODO -- remove me
System.err.println("Required multiple read attempts to actually get the entire BCF2 block, unexpected behavior");
}
validateReadBytes(bytesRead, nReadAttempts, blockSizeInBytes);
} catch ( IOException e ) {

View File

@ -27,7 +27,6 @@ package org.broadinstitute.variant.bcf2;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.apache.log4j.Logger;
import org.broadinstitute.variant.vcf.VCFConstants;
import org.broadinstitute.variant.vcf.VCFHeader;
import org.broadinstitute.variant.variantcontext.Allele;
@ -46,7 +45,6 @@ import java.util.*;
* @since 6/12
*/
public class BCF2GenotypeFieldDecoders {
final protected static Logger logger = Logger.getLogger(BCF2GenotypeFieldDecoders.class);
private final static boolean ENABLE_FASTPATH_GT = true;
private final static int MIN_SAMPLES_FOR_FASTPATH_GENOTYPES = 0; // TODO -- update to reasonable number

View File

@ -26,7 +26,6 @@
package org.broadinstitute.variant.bcf2;
import com.google.java.contract.Requires;
import org.apache.log4j.Logger;
import org.broad.tribble.TribbleException;
import org.broadinstitute.variant.variantcontext.*;
@ -40,8 +39,6 @@ import java.util.*;
* @since 5/12
*/
public class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser {
final protected static Logger logger = Logger.getLogger(BCF2LazyGenotypesDecoder.class);
// the essential information for us to use to decode the genotypes data
// initialized when this lazy decoder is created, as we know all of this from the BCF2Codec
// and its stored here again for code cleanliness
@ -63,8 +60,6 @@ public class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser
@Override
public LazyGenotypesContext.LazyData parse(final Object data) {
// if ( logger.isDebugEnabled() )
// logger.debug("Decoding BCF genotypes for " + nSamples + " samples with " + nFields + " fields each");
try {
// load our byte[] data into the decoder

View File

@ -25,10 +25,18 @@
package org.broadinstitute.variant.utils;
import java.util.Collection;
import java.util.Iterator;
import java.util.*;
public class Utils {
/**
* Constants and utility methods used throughout the VCF/BCF/VariantContext classes
*/
public class GeneralUtils {
/**
* Setting this to true causes the VCF/BCF/VariantContext classes to emit debugging information
* to standard error
*/
public static final boolean DEBUG_MODE_ENABLED = false;
/**
* The smallest log10 value we'll emit from normalizeFromLog10 and other functions
@ -66,7 +74,6 @@ public class Utils {
}
}
/**
* normalizes the log10-based array. ASSUMES THAT ALL ARRAY ENTRIES ARE <= 0 (<= 1 IN REAL-SPACE).
*
@ -134,10 +141,21 @@ public class Utils {
return normalized;
}
public static double sum(double[] values) {
double s = 0.0;
for (double v : values)
s += v;
return s;
}
public static double arrayMax(final double[] array) {
return array[maxElementIndex(array, array.length)];
}
public static int maxElementIndex(final double[] array) {
return maxElementIndex(array, array.length);
}
public static int maxElementIndex(final double[] array, final int endIndex) {
if (array == null || array.length == 0)
throw new IllegalArgumentException("Array cannot be null!");
@ -150,6 +168,82 @@ public class Utils {
return maxI;
}
public static <T> List<T> cons(final T elt, final List<T> l) {
List<T> l2 = new ArrayList<T>();
l2.add(elt);
if (l != null) l2.addAll(l);
return l2;
}
/**
* Make all combinations of N size of objects
*
* if objects = [A, B, C]
* if N = 1 => [[A], [B], [C]]
* if N = 2 => [[A, A], [B, A], [C, A], [A, B], [B, B], [C, B], [A, C], [B, C], [C, C]]
*
* @param objects
* @param n
* @param <T>
* @param withReplacement if false, the resulting permutations will only contain unique objects from objects
* @return
*/
public static <T> List<List<T>> makePermutations(final List<T> objects, final int n, final boolean withReplacement) {
final List<List<T>> combinations = new ArrayList<List<T>>();
if ( n <= 0 )
;
else if ( n == 1 ) {
for ( final T o : objects )
combinations.add(Collections.singletonList(o));
} else {
final List<List<T>> sub = makePermutations(objects, n - 1, withReplacement);
for ( List<T> subI : sub ) {
for ( final T a : objects ) {
if ( withReplacement || ! subI.contains(a) )
combinations.add(cons(a, subI));
}
}
}
return combinations;
}
/**
* Compares double values for equality (within 1e-6), or inequality.
*
* @param a the first double value
* @param b the second double value
* @return -1 if a is greater than b, 0 if a is equal to be within 1e-6, 1 if b is greater than a.
*/
public static byte compareDoubles(double a, double b) {
return compareDoubles(a, b, 1e-6);
}
/**
* Compares double values for equality (within epsilon), or inequality.
*
* @param a the first double value
* @param b the second double value
* @param epsilon the precision within which two double values will be considered equal
* @return -1 if a is greater than b, 0 if a is equal to be within epsilon, 1 if b is greater than a.
*/
public static byte compareDoubles(double a, double b, double epsilon) {
if (Math.abs(a - b) < epsilon) {
return 0;
}
if (a > b) {
return -1;
}
return 1;
}
static public final <T> List<T> reverse(final List<T> l) {
final List<T> newL = new ArrayList<T>(l);
Collections.reverse(newL);
return newL;
}
}

View File

@ -23,7 +23,7 @@
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.collections;
package org.broadinstitute.variant.utils;
public class Pair<X,Y> {

View File

@ -28,7 +28,7 @@ package org.broadinstitute.variant.variantcontext;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.broad.tribble.TribbleException;
import org.broadinstitute.variant.utils.Utils;
import org.broadinstitute.variant.utils.GeneralUtils;
import org.broadinstitute.variant.vcf.VCFConstants;
import java.util.Arrays;
@ -155,7 +155,7 @@ public class GenotypeLikelihoods {
//Returns null in case of missing likelihoods
public EnumMap<GenotypeType,Double> getAsMap(boolean normalizeFromLog10){
//Make sure that the log10likelihoods are set
double[] likelihoods = normalizeFromLog10 ? Utils.normalizeFromLog10(getAsVector()) : getAsVector();
double[] likelihoods = normalizeFromLog10 ? GeneralUtils.normalizeFromLog10(getAsVector()) : getAsVector();
if(likelihoods == null)
return null;
EnumMap<GenotypeType,Double> likelihoodsMap = new EnumMap<GenotypeType, Double>(GenotypeType.class);
@ -215,7 +215,7 @@ public class GenotypeLikelihoods {
if (qual < 0) {
// QUAL can be negative if the chosen genotype is not the most likely one individually.
// In this case, we compute the actual genotype probability and QUAL is the likelihood of it not being the chosen one
double[] normalized = Utils.normalizeFromLog10(likelihoods);
double[] normalized = GeneralUtils.normalizeFromLog10(likelihoods);
double chosenGenotype = normalized[iOfChoosenGenotype];
return Math.log10(1.0 - chosenGenotype);
} else {

View File

@ -25,10 +25,10 @@
package org.broadinstitute.variant.variantcontext;
import org.apache.log4j.Logger;
import org.broad.tribble.Feature;
import org.broad.tribble.TribbleException;
import org.broad.tribble.util.ParsingUtils;
import org.broadinstitute.variant.utils.GeneralUtils;
import org.broadinstitute.variant.vcf.*;
import java.util.*;
@ -202,7 +202,6 @@ import java.util.*;
public class VariantContext implements Feature { // to enable tribble integration
private final static boolean WARN_ABOUT_BAD_END = true;
private final static int MAX_ALLELE_SIZE_FOR_NON_SV = 150;
final protected static Logger logger = Logger.getLogger(VariantContext.class);
private boolean fullyDecoded = false;
protected CommonInfo commonInfo = null;
public final static double NO_LOG10_PERROR = CommonInfo.NO_LOG10_PERROR;
@ -1176,10 +1175,12 @@ public class VariantContext implements Feature { // to enable tribble integratio
final String message = "Badly formed variant context at location " + getChr() + ":"
+ getStart() + "; getEnd() was " + getEnd()
+ " but this VariantContext contains an END key with value " + end;
if ( WARN_ABOUT_BAD_END )
logger.warn(message);
else
if ( GeneralUtils.DEBUG_MODE_ENABLED && WARN_ABOUT_BAD_END ) {
System.err.println(message);
}
else {
throw new TribbleException(message);
}
}
} else {
final long length = (stop - start) + 1;

View File

@ -29,24 +29,27 @@ import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.apache.commons.jexl2.Expression;
import org.apache.commons.jexl2.JexlEngine;
import org.apache.log4j.Logger;
import org.apache.commons.lang.ArrayUtils;
import org.broad.tribble.TribbleException;
import org.broad.tribble.util.popgen.HardyWeinbergCalculation;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.variant.utils.Utils;
import org.broadinstitute.variant.utils.GeneralUtils;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.variant.vcf.*;
import java.io.Serializable;
import java.util.*;
public class VariantContextUtils {
private static Logger logger = Logger.getLogger(VariantContextUtils.class);
public final static String MERGE_INTERSECTION = "Intersection";
public final static String MERGE_FILTER_IN_ALL = "FilteredInAll";
public final static String MERGE_REF_IN_ALL = "ReferenceInAll";
public final static String MERGE_FILTER_PREFIX = "filterIn";
public static final int DEFAULT_PLOIDY = 2;
public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
private static Set<String> MISSING_KEYS_WARNED_ABOUT = new HashSet<String>();
private static final List<Allele> NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
final public static JexlEngine engine = new JexlEngine();
private final static boolean ASSUME_MISSING_FIELDS_ARE_STRINGS = false;
@ -166,7 +169,8 @@ public class VariantContextUtils {
if ( ASSUME_MISSING_FIELDS_ARE_STRINGS ) {
if ( ! MISSING_KEYS_WARNED_ABOUT.contains(field) ) {
MISSING_KEYS_WARNED_ABOUT.add(field);
logger.warn("Field " + field + " missing from VCF header, assuming it is an unbounded string type");
if ( GeneralUtils.DEBUG_MODE_ENABLED )
System.err.println("Field " + field + " missing from VCF header, assuming it is an unbounded string type");
}
return new VCFInfoHeaderLine(field, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Auto-generated string header for " + field);
}
@ -176,6 +180,428 @@ public class VariantContextUtils {
return metaData;
}
/**
* Returns true iff VC is an non-complex indel where every allele represents an expansion or
* contraction of a series of identical bases in the reference.
*
* For example, suppose the ref bases are CTCTCTGA, which includes a 3x repeat of CTCTCT
*
* If VC = -/CT, then this function returns true because the CT insertion matches exactly the
* upcoming reference.
* If VC = -/CTA then this function returns false because the CTA isn't a perfect match
*
* Now consider deletions:
*
* If VC = CT/- then again the same logic applies and this returns true
* The case of CTA/- makes no sense because it doesn't actually match the reference bases.
*
* The logic of this function is pretty simple. Take all of the non-null alleles in VC. For
* each insertion allele of n bases, check if that allele matches the next n reference bases.
* For each deletion allele of n bases, check if this matches the reference bases at n - 2 n,
* as it must necessarily match the first n bases. If this test returns true for all
* alleles you are a tandem repeat, otherwise you are not.
*
* @param vc
* @param refBasesStartingAtVCWithPad not this is assumed to include the PADDED reference
* @return
*/
@Requires({"vc != null", "refBasesStartingAtVCWithPad != null && refBasesStartingAtVCWithPad.length > 0"})
public static boolean isTandemRepeat(final VariantContext vc, final byte[] refBasesStartingAtVCWithPad) {
final String refBasesStartingAtVCWithoutPad = new String(refBasesStartingAtVCWithPad).substring(1);
if ( ! vc.isIndel() ) // only indels are tandem repeats
return false;
final Allele ref = vc.getReference();
for ( final Allele allele : vc.getAlternateAlleles() ) {
if ( ! isRepeatAllele(ref, allele, refBasesStartingAtVCWithoutPad) )
return false;
}
// we've passed all of the tests, so we are a repeat
return true;
}
/**
*
* @param vc
* @param refBasesStartingAtVCWithPad
* @return
*/
@Requires({"vc != null", "refBasesStartingAtVCWithPad != null && refBasesStartingAtVCWithPad.length > 0"})
public static Pair<List<Integer>,byte[]> getNumTandemRepeatUnits(final VariantContext vc, final byte[] refBasesStartingAtVCWithPad) {
final boolean VERBOSE = false;
final String refBasesStartingAtVCWithoutPad = new String(refBasesStartingAtVCWithPad).substring(1);
if ( ! vc.isIndel() ) // only indels are tandem repeats
return null;
final Allele refAllele = vc.getReference();
final byte[] refAlleleBases = Arrays.copyOfRange(refAllele.getBases(), 1, refAllele.length());
byte[] repeatUnit = null;
final ArrayList<Integer> lengths = new ArrayList<Integer>();
for ( final Allele allele : vc.getAlternateAlleles() ) {
Pair<int[],byte[]> result = getNumTandemRepeatUnits(refAlleleBases, Arrays.copyOfRange(allele.getBases(), 1, allele.length()), refBasesStartingAtVCWithoutPad.getBytes());
final int[] repetitionCount = result.first;
// repetition count = 0 means allele is not a tandem expansion of context
if (repetitionCount[0] == 0 || repetitionCount[1] == 0)
return null;
if (lengths.size() == 0) {
lengths.add(repetitionCount[0]); // add ref allele length only once
}
lengths.add(repetitionCount[1]); // add this alt allele's length
repeatUnit = result.second;
if (VERBOSE) {
System.out.println("RefContext:"+refBasesStartingAtVCWithoutPad);
System.out.println("Ref:"+refAllele.toString()+" Count:" + String.valueOf(repetitionCount[0]));
System.out.println("Allele:"+allele.toString()+" Count:" + String.valueOf(repetitionCount[1]));
System.out.println("RU:"+new String(repeatUnit));
}
}
return new Pair<List<Integer>, byte[]>(lengths,repeatUnit);
}
protected static Pair<int[],byte[]> getNumTandemRepeatUnits(final byte[] refBases, final byte[] altBases, final byte[] remainingRefContext) {
/* we can't exactly apply same logic as in basesAreRepeated() to compute tandem unit and number of repeated units.
Consider case where ref =ATATAT and we have an insertion of ATAT. Natural description is (AT)3 -> (AT)5.
*/
byte[] longB;
// find first repeat unit based on either ref or alt, whichever is longer
if (altBases.length > refBases.length)
longB = altBases;
else
longB = refBases;
// see if non-null allele (either ref or alt, whichever is longer) can be decomposed into several identical tandem units
// for example, -*,CACA needs to first be decomposed into (CA)2
final int repeatUnitLength = findRepeatedSubstring(longB);
final byte[] repeatUnit = Arrays.copyOf(longB, repeatUnitLength);
final int[] repetitionCount = new int[2];
// repetitionCount[0] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(refBases, remainingRefContext));
// repetitionCount[1] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(altBases, remainingRefContext));
int repetitionsInRef = findNumberofRepetitions(repeatUnit,refBases);
repetitionCount[0] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(refBases, remainingRefContext))-repetitionsInRef;
repetitionCount[1] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(altBases, remainingRefContext))-repetitionsInRef;
return new Pair<int[], byte[]>(repetitionCount, repeatUnit);
}
/**
* Find out if a string can be represented as a tandem number of substrings.
* For example ACTACT is a 2-tandem of ACT,
* but ACTACA is not.
*
* @param bases String to be tested
* @return Length of repeat unit, if string can be represented as tandem of substring (if it can't
* be represented as one, it will be just the length of the input string)
*/
public static int findRepeatedSubstring(byte[] bases) {
int repLength;
for (repLength=1; repLength <=bases.length; repLength++) {
final byte[] candidateRepeatUnit = Arrays.copyOf(bases,repLength);
boolean allBasesMatch = true;
for (int start = repLength; start < bases.length; start += repLength ) {
// check that remaining of string is exactly equal to repeat unit
final byte[] basePiece = Arrays.copyOfRange(bases,start,start+candidateRepeatUnit.length);
if (!Arrays.equals(candidateRepeatUnit, basePiece)) {
allBasesMatch = false;
break;
}
}
if (allBasesMatch)
return repLength;
}
return repLength;
}
/**
* Helper routine that finds number of repetitions a string consists of.
* For example, for string ATAT and repeat unit AT, number of repetitions = 2
* @param repeatUnit Substring
* @param testString String to test
* @return Number of repetitions (0 if testString is not a concatenation of n repeatUnit's
*/
public static int findNumberofRepetitions(byte[] repeatUnit, byte[] testString) {
int numRepeats = 0;
for (int start = 0; start < testString.length; start += repeatUnit.length) {
int end = start + repeatUnit.length;
byte[] unit = Arrays.copyOfRange(testString,start, end);
if(Arrays.equals(unit,repeatUnit))
numRepeats++;
else
return numRepeats;
}
return numRepeats;
}
/**
* Helper function for isTandemRepeat that checks that allele matches somewhere on the reference
* @param ref
* @param alt
* @param refBasesStartingAtVCWithoutPad
* @return
*/
protected static boolean isRepeatAllele(final Allele ref, final Allele alt, final String refBasesStartingAtVCWithoutPad) {
if ( ! Allele.oneIsPrefixOfOther(ref, alt) )
return false; // we require one allele be a prefix of another
if ( ref.length() > alt.length() ) { // we are a deletion
return basesAreRepeated(ref.getBaseString(), alt.getBaseString(), refBasesStartingAtVCWithoutPad, 2);
} else { // we are an insertion
return basesAreRepeated(alt.getBaseString(), ref.getBaseString(), refBasesStartingAtVCWithoutPad, 1);
}
}
protected static boolean basesAreRepeated(final String l, final String s, final String ref, final int minNumberOfMatches) {
final String potentialRepeat = l.substring(s.length()); // skip s bases
for ( int i = 0; i < minNumberOfMatches; i++) {
final int start = i * potentialRepeat.length();
final int end = (i+1) * potentialRepeat.length();
if ( ref.length() < end )
return false; // we ran out of bases to test
final String refSub = ref.substring(start, end);
if ( ! refSub.equals(potentialRepeat) )
return false; // repeat didn't match, fail
}
return true; // we passed all tests, we matched
}
/**
* Assign genotypes (GTs) to the samples in the Variant Context greedily based on the PLs
*
* @param vc variant context with genotype likelihoods
* @return genotypes context
*/
public static GenotypesContext assignDiploidGenotypes(final VariantContext vc) {
return subsetDiploidAlleles(vc, vc.getAlleles(), true);
}
/**
* Split variant context into its biallelic components if there are more than 2 alleles
*
* For VC has A/B/C alleles, returns A/B and A/C contexts.
* Genotypes are all no-calls now (it's not possible to fix them easily)
* Alleles are right trimmed to satisfy VCF conventions
*
* If vc is biallelic or non-variant it is just returned
*
* Chromosome counts are updated (but they are by definition 0)
*
* @param vc a potentially multi-allelic variant context
* @return a list of bi-allelic (or monomorphic) variant context
*/
public static List<VariantContext> splitVariantContextToBiallelics(final VariantContext vc) {
if ( ! vc.isVariant() || vc.isBiallelic() )
// non variant or biallelics already satisfy the contract
return Collections.singletonList(vc);
else {
final List<VariantContext> biallelics = new LinkedList<VariantContext>();
for ( final Allele alt : vc.getAlternateAlleles() ) {
VariantContextBuilder builder = new VariantContextBuilder(vc);
final List<Allele> alleles = Arrays.asList(vc.getReference(), alt);
builder.alleles(alleles);
builder.genotypes(subsetDiploidAlleles(vc, alleles, false));
calculateChromosomeCounts(builder, true);
biallelics.add(reverseTrimAlleles(builder.make()));
}
return biallelics;
}
}
/**
* subset the Variant Context to the specific set of alleles passed in (pruning the PLs appropriately)
*
* @param vc variant context with genotype likelihoods
* @param allelesToUse which alleles from the vc are okay to use; *** must be in the same relative order as those in the original VC ***
* @param assignGenotypes true if we should update the genotypes based on the (subsetted) PLs
* @return genotypes
*/
public static GenotypesContext subsetDiploidAlleles(final VariantContext vc,
final List<Allele> allelesToUse,
final boolean assignGenotypes) {
// the genotypes with PLs
final GenotypesContext oldGTs = vc.getGenotypes();
// samples
final List<String> sampleIndices = oldGTs.getSampleNamesOrderedByName();
// the new genotypes to create
final GenotypesContext newGTs = GenotypesContext.create();
// we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward
final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
final int numNewAltAlleles = allelesToUse.size() - 1;
// which PLs should be carried forward?
ArrayList<Integer> likelihoodIndexesToUse = null;
// an optimization: if we are supposed to use all (or none in the case of a ref call) of the alleles,
// then we can keep the PLs as is; otherwise, we determine which ones to keep
if ( numNewAltAlleles != numOriginalAltAlleles && numNewAltAlleles > 0 ) {
likelihoodIndexesToUse = new ArrayList<Integer>(30);
final boolean[] altAlleleIndexToUse = new boolean[numOriginalAltAlleles];
for ( int i = 0; i < numOriginalAltAlleles; i++ ) {
if ( allelesToUse.contains(vc.getAlternateAllele(i)) )
altAlleleIndexToUse[i] = true;
}
// numLikelihoods takes total # of alleles. Use default # of chromosomes (ploidy) = 2
final int numLikelihoods = GenotypeLikelihoods.numLikelihoods(1 + numOriginalAltAlleles, DEFAULT_PLOIDY);
for ( int PLindex = 0; PLindex < numLikelihoods; PLindex++ ) {
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
// consider this entry only if both of the alleles are good
if ( (alleles.alleleIndex1 == 0 || altAlleleIndexToUse[alleles.alleleIndex1 - 1]) && (alleles.alleleIndex2 == 0 || altAlleleIndexToUse[alleles.alleleIndex2 - 1]) )
likelihoodIndexesToUse.add(PLindex);
}
}
// create the new genotypes
for ( int k = 0; k < oldGTs.size(); k++ ) {
final Genotype g = oldGTs.get(sampleIndices.get(k));
if ( !g.hasLikelihoods() ) {
newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES));
continue;
}
// create the new likelihoods array from the alleles we are allowed to use
final double[] originalLikelihoods = g.getLikelihoods().getAsVector();
double[] newLikelihoods;
if ( likelihoodIndexesToUse == null ) {
newLikelihoods = originalLikelihoods;
} else {
newLikelihoods = new double[likelihoodIndexesToUse.size()];
int newIndex = 0;
for ( int oldIndex : likelihoodIndexesToUse )
newLikelihoods[newIndex++] = originalLikelihoods[oldIndex];
// might need to re-normalize
newLikelihoods = GeneralUtils.normalizeFromLog10(newLikelihoods, false, true);
}
// if there is no mass on the (new) likelihoods, then just no-call the sample
if ( GeneralUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) {
newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES));
}
else {
final GenotypeBuilder gb = new GenotypeBuilder(g);
if ( numNewAltAlleles == 0 )
gb.noPL();
else
gb.PL(newLikelihoods);
// if we weren't asked to assign a genotype, then just no-call the sample
if ( !assignGenotypes || GeneralUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) {
gb.alleles(NO_CALL_ALLELES);
}
else {
// find the genotype with maximum likelihoods
int PLindex = numNewAltAlleles == 0 ? 0 : GeneralUtils.maxElementIndex(newLikelihoods);
GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
gb.alleles(Arrays.asList(allelesToUse.get(alleles.alleleIndex1), allelesToUse.get(alleles.alleleIndex2)));
if ( numNewAltAlleles != 0 ) gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods));
}
newGTs.add(gb.make());
}
}
return newGTs;
}
public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) {
// see whether we need to trim common reference base from all alleles
final int trimExtent = computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, false);
if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 )
return inputVC;
final List<Allele> alleles = new ArrayList<Allele>();
final GenotypesContext genotypes = GenotypesContext.create();
final Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
for (final Allele a : inputVC.getAlleles()) {
if (a.isSymbolic()) {
alleles.add(a);
originalToTrimmedAlleleMap.put(a, a);
} else {
// get bases for current allele and create a new one with trimmed bases
final byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent);
final Allele trimmedAllele = Allele.create(newBases, a.isReference());
alleles.add(trimmedAllele);
originalToTrimmedAlleleMap.put(a, trimmedAllele);
}
}
// now we can recreate new genotypes with trimmed alleles
for ( final Genotype genotype : inputVC.getGenotypes() ) {
final List<Allele> originalAlleles = genotype.getAlleles();
final List<Allele> trimmedAlleles = new ArrayList<Allele>();
for ( final Allele a : originalAlleles ) {
if ( a.isCalled() )
trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
else
trimmedAlleles.add(Allele.NO_CALL);
}
genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make());
}
return new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length() - 1).alleles(alleles).genotypes(genotypes).make();
}
public static int computeReverseClipping(final List<Allele> unclippedAlleles,
final byte[] ref,
final int forwardClipping,
final boolean allowFullClip) {
int clipping = 0;
boolean stillClipping = true;
while ( stillClipping ) {
for ( final Allele a : unclippedAlleles ) {
if ( a.isSymbolic() )
continue;
// we need to ensure that we don't reverse clip out all of the bases from an allele because we then will have the wrong
// position set for the VariantContext (although it's okay to forward clip it all out, because the position will be fine).
if ( a.length() - clipping == 0 )
return clipping - (allowFullClip ? 0 : 1);
if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 ) {
stillClipping = false;
}
else if ( ref.length == clipping ) {
if ( allowFullClip )
stillClipping = false;
else
return -1;
}
else if ( a.getBases()[a.length()-clipping-1] != ref[ref.length-clipping-1] ) {
stillClipping = false;
}
}
if ( stillClipping )
clipping++;
}
return clipping;
}
/**
* A simple but common wrapper for matching VariantContext objects using JEXL expressions
*/
@ -602,9 +1028,10 @@ public class VariantContextUtils {
if (vc.alleles.size() == 1)
continue;
if ( hasPLIncompatibleAlleles(alleles, vc.alleles)) {
if ( ! genotypes.isEmpty() )
logger.debug(String.format("Stripping PLs at %s:%d-%d due to incompatible alleles merged=%s vs. single=%s",
vc.getChr(), vc.getStart(), vc.getEnd(), alleles, vc.alleles));
if ( GeneralUtils.DEBUG_MODE_ENABLED && ! genotypes.isEmpty() ) {
System.err.println(String.format("Stripping PLs at %s:%d-%d due to incompatible alleles merged=%s vs. single=%s",
vc.getChr(), vc.getStart(), vc.getEnd(), alleles, vc.alleles));
}
genotypes = stripPLsAndAD(genotypes);
// this will remove stale AC,AF attributed from vc
calculateChromosomeCounts(vc, attributes, true);
@ -635,7 +1062,7 @@ public class VariantContextUtils {
for ( final VariantContext vc : VCs )
if ( vc.isVariant() )
s.add( vc.isFiltered() ? MERGE_FILTER_PREFIX + vc.getSource() : vc.getSource() );
setValue = Utils.join("-", s);
setValue = GeneralUtils.join("-", s);
}
if ( setKey != null ) {
@ -649,7 +1076,7 @@ public class VariantContextUtils {
if ( depth > 0 )
attributes.put(VCFConstants.DEPTH_KEY, String.valueOf(depth));
final String ID = rsIDs.isEmpty() ? VCFConstants.EMPTY_ID_FIELD : Utils.join(",", rsIDs);
final String ID = rsIDs.isEmpty() ? VCFConstants.EMPTY_ID_FIELD : GeneralUtils.join(",", rsIDs);
final VariantContextBuilder builder = new VariantContextBuilder().source(name).id(ID);
builder.loc(longestVC.getChr(), longestVC.getStart(), longestVC.getEnd());

View File

@ -27,7 +27,7 @@ package org.broadinstitute.variant.variantcontext;
import org.apache.commons.jexl2.JexlContext;
import org.apache.commons.jexl2.MapContext;
import org.broadinstitute.variant.utils.Utils;
import org.broadinstitute.variant.utils.GeneralUtils;
import org.broadinstitute.variant.vcf.VCFConstants;
import java.util.Collection;
@ -168,7 +168,7 @@ class JEXLMap implements Map<VariantContextUtils.JexlVCMatchExp, Boolean> {
infoMap.put("QUAL", String.valueOf(vc.getPhredScaledQual()));
// add alleles
infoMap.put("ALLELES", Utils.join(";", vc.getAlleles()));
infoMap.put("ALLELES", GeneralUtils.join(";", vc.getAlleles()));
infoMap.put("N_ALLELES", String.valueOf(vc.getNAlleles()));
// add attributes

View File

@ -27,7 +27,7 @@ package org.broadinstitute.variant.variantcontext.writer;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.apache.log4j.Logger;
import org.broadinstitute.variant.utils.GeneralUtils;
import org.broadinstitute.variant.vcf.*;
import java.util.HashMap;
@ -40,7 +40,6 @@ import java.util.Map;
* @since 06/12
*/
public class BCF2FieldWriterManager {
final protected static Logger logger = Logger.getLogger(BCF2FieldWriterManager.class);
final Map<String, BCF2FieldWriter.SiteWriter> siteWriters = new HashMap<String, BCF2FieldWriter.SiteWriter>();
final Map<String, BCF2FieldWriter.GenotypesWriter> genotypesWriters = new HashMap<String, BCF2FieldWriter.GenotypesWriter>();
final IntGenotypeFieldAccessors intGenotypeFieldAccessors = new IntGenotypeFieldAccessors();
@ -98,8 +97,8 @@ public class BCF2FieldWriterManager {
final boolean createGenotypesEncoders ) {
if ( createGenotypesEncoders && intGenotypeFieldAccessors.getAccessor(line.getID()) != null ) {
if ( line.getType() != VCFHeaderLineType.Integer )
logger.warn("Warning: field " + line.getID() + " expected to encode an integer but saw " + line.getType() + " for record " + line);
if ( GeneralUtils.DEBUG_MODE_ENABLED && line.getType() != VCFHeaderLineType.Integer )
System.err.println("Warning: field " + line.getID() + " expected to encode an integer but saw " + line.getType() + " for record " + line);
return new BCF2FieldEncoder.IntArray(line, dict);
} else if ( createGenotypesEncoders && line.getID().equals(VCFConstants.GENOTYPE_KEY) ) {
return new BCF2FieldEncoder.GenericInts(line, dict);

View File

@ -28,11 +28,11 @@ package org.broadinstitute.variant.variantcontext.writer;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.samtools.SAMSequenceDictionary;
import org.apache.log4j.Logger;
import org.broadinstitute.variant.bcf2.BCF2Codec;
import org.broadinstitute.variant.bcf2.BCF2Type;
import org.broadinstitute.variant.bcf2.BCF2Utils;
import org.broadinstitute.variant.bcf2.BCFVersion;
import org.broadinstitute.variant.utils.GeneralUtils;
import org.broadinstitute.variant.vcf.VCFConstants;
import org.broadinstitute.variant.vcf.VCFContigHeaderLine;
import org.broadinstitute.variant.vcf.VCFHeader;
@ -89,7 +89,6 @@ class BCF2Writer extends IndexingVariantContextWriter {
public static final int MAJOR_VERSION = 2;
public static final int MINOR_VERSION = 1;
final protected static Logger logger = Logger.getLogger(BCF2Writer.class);
final private static boolean ALLOW_MISSING_CONTIG_LINES = false;
private final OutputStream outputStream; // Note: do not flush until completely done writing, to avoid issues with eventual BGZF support
@ -129,7 +128,9 @@ class BCF2Writer extends IndexingVariantContextWriter {
// create the config offsets map
if ( header.getContigLines().isEmpty() ) {
if ( ALLOW_MISSING_CONTIG_LINES ) {
logger.warn("No contig dictionary found in header, falling back to reference sequence dictionary");
if ( GeneralUtils.DEBUG_MODE_ENABLED ) {
System.err.println("No contig dictionary found in header, falling back to reference sequence dictionary");
}
createContigDictionary(VCFUtils.makeContigHeaderLines(getRefDict(), null));
} else {
throw new IllegalStateException("Cannot write BCF2 file with missing contig lines");
@ -275,10 +276,8 @@ class BCF2Writer extends IndexingVariantContextWriter {
if ( lgc.getUnparsedGenotypeData() instanceof BCF2Codec.LazyData &&
canSafelyWriteRawGenotypesBytes((BCF2Codec.LazyData) lgc.getUnparsedGenotypeData())) {
//logger.info("Passing on raw BCF2 genotypes data");
return (BCF2Codec.LazyData)lgc.getUnparsedGenotypeData();
} else {
//logger.info("Decoding raw BCF2 genotypes data");
lgc.decode(); // WARNING -- required to avoid keeping around bad lazy data for too long
}
}

View File

@ -25,7 +25,6 @@
package org.broadinstitute.variant.vcf;
import org.apache.log4j.Logger;
import org.broad.tribble.AsciiFeatureCodec;
import org.broad.tribble.Feature;
import org.broad.tribble.NameAwareCodec;
@ -33,6 +32,7 @@ import org.broad.tribble.TribbleException;
import org.broad.tribble.readers.LineReader;
import org.broad.tribble.util.BlockCompressedInputStream;
import org.broad.tribble.util.ParsingUtils;
import org.broadinstitute.variant.utils.GeneralUtils;
import org.broadinstitute.variant.variantcontext.*;
import java.io.FileInputStream;
@ -46,7 +46,6 @@ import java.util.zip.GZIPInputStream;
public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext> implements NameAwareCodec {
public final static int MAX_ALLELE_SIZE_BEFORE_WARNING = (int)Math.pow(2, 20);
protected final static Logger log = Logger.getLogger(AbstractVCFCodec.class);
protected final static int NUM_STANDARD_FIELDS = 8; // INFO is the 8th column
// we have to store the list of strings that make up the header until they're needed
@ -397,9 +396,9 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
key = infoFieldArray[i];
final VCFInfoHeaderLine headerLine = header.getInfoHeaderLine(key);
if ( headerLine != null && headerLine.getType() != VCFHeaderLineType.Flag ) {
if ( ! warnedAboutNoEqualsForNonFlag ) {
log.warn("Found info key " + key + " without a = value, but the header says the field is of type "
+ headerLine.getType() + " but this construct is only value for FLAG type fields");
if ( GeneralUtils.DEBUG_MODE_ENABLED && ! warnedAboutNoEqualsForNonFlag ) {
System.err.println("Found info key " + key + " without a = value, but the header says the field is of type "
+ headerLine.getType() + " but this construct is only value for FLAG type fields");
warnedAboutNoEqualsForNonFlag = true;
}
@ -517,8 +516,9 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
if ( allele == null || allele.length() == 0 )
generateException("Empty alleles are not permitted in VCF records", lineNo);
if ( MAX_ALLELE_SIZE_BEFORE_WARNING != -1 && allele.length() > MAX_ALLELE_SIZE_BEFORE_WARNING )
log.warn(String.format("Allele detected with length %d exceeding max size %d at approximately line %d, likely resulting in degraded VCF processing performance", allele.length(), MAX_ALLELE_SIZE_BEFORE_WARNING, lineNo));
if ( GeneralUtils.DEBUG_MODE_ENABLED && MAX_ALLELE_SIZE_BEFORE_WARNING != -1 && allele.length() > MAX_ALLELE_SIZE_BEFORE_WARNING ) {
System.err.println(String.format("Allele detected with length %d exceeding max size %d at approximately line %d, likely resulting in degraded VCF processing performance", allele.length(), MAX_ALLELE_SIZE_BEFORE_WARNING, lineNo));
}
if ( isSymbolicAllele(allele) ) {
if ( isRef ) {

View File

@ -25,8 +25,8 @@
package org.broadinstitute.variant.vcf;
import org.apache.log4j.Logger;
import org.broad.tribble.TribbleException;
import org.broadinstitute.variant.utils.GeneralUtils;
import org.broadinstitute.variant.variantcontext.GenotypeLikelihoods;
import org.broadinstitute.variant.variantcontext.VariantContext;
@ -38,7 +38,6 @@ import java.util.Map;
* a base class for compound header lines, which include info lines and format lines (so far)
*/
public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCFIDHeaderLine {
final protected static Logger logger = Logger.getLogger(VCFHeader.class);
public enum SupportedHeaderLineType {
INFO(true), FORMAT(false);
@ -197,7 +196,9 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
if ( type == VCFHeaderLineType.Flag && count != 0 ) {
count = 0;
logger.warn("FLAG fields must have a count value of 0, but saw " + count + " for header line " + getID() + ". Changing it to 0 inside the code");
if ( GeneralUtils.DEBUG_MODE_ENABLED ) {
System.err.println("FLAG fields must have a count value of 0, but saw " + count + " for header line " + getID() + ". Changing it to 0 inside the code");
}
}
}

View File

@ -25,9 +25,9 @@
package org.broadinstitute.variant.vcf;
import org.apache.log4j.Logger;
import org.broad.tribble.TribbleException;
import org.broad.tribble.util.ParsingUtils;
import org.broadinstitute.variant.utils.GeneralUtils;
import java.util.*;
@ -45,7 +45,6 @@ import java.util.*;
* A class representing the VCF header
*/
public class VCFHeader {
final protected static Logger logger = Logger.getLogger(VCFHeader.class);
// the mandatory header fields
public enum HEADER_FIELDS {
@ -238,9 +237,11 @@ public class VCFHeader {
}
if ( hasFormatLine(VCFConstants.GENOTYPE_LIKELIHOODS_KEY) && ! hasFormatLine(VCFConstants.GENOTYPE_PL_KEY) ) {
logger.warn("Found " + VCFConstants.GENOTYPE_LIKELIHOODS_KEY + " format, but no "
+ VCFConstants.GENOTYPE_PL_KEY + " field. We now only manage PL fields internally"
+ " automatically adding a corresponding PL field to your VCF header");
if ( GeneralUtils.DEBUG_MODE_ENABLED ) {
System.err.println("Found " + VCFConstants.GENOTYPE_LIKELIHOODS_KEY + " format, but no "
+ VCFConstants.GENOTYPE_PL_KEY + " field. We now only manage PL fields internally"
+ " automatically adding a corresponding PL field to your VCF header");
}
addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_PL_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"));
}
}
@ -254,10 +255,14 @@ public class VCFHeader {
*/
private final <T extends VCFCompoundHeaderLine> void addMetaDataMapBinding(final Map<String, T> map, T line) {
final String key = line.getID();
if ( map.containsKey(key) )
logger.debug("Found duplicate VCF header lines for " + key + "; keeping the first only" );
else
if ( map.containsKey(key) ) {
if ( GeneralUtils.DEBUG_MODE_ENABLED ) {
System.err.println("Found duplicate VCF header lines for " + key + "; keeping the first only" );
}
}
else {
map.put(key, line);
}
}
/**

View File

@ -27,8 +27,8 @@ package org.broadinstitute.variant.vcf;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.apache.log4j.Logger;
import org.broad.tribble.TribbleException;
import org.broadinstitute.variant.utils.GeneralUtils;
import java.util.*;
@ -46,7 +46,6 @@ public class VCFStandardHeaderLines {
* Enabling this causes us to repair header lines even if only their descriptions differ
*/
private final static boolean REPAIR_BAD_DESCRIPTIONS = false;
protected final static Logger logger = Logger.getLogger(VCFStandardHeaderLines.class);
private static Standards<VCFFormatHeaderLine> formatStandards = new Standards<VCFFormatHeaderLine>();
private static Standards<VCFInfoHeaderLine> infoStandards = new Standards<VCFInfoHeaderLine>();
@ -216,11 +215,13 @@ public class VCFStandardHeaderLines {
final boolean needsRepair = badCountType || badCount || badType || (REPAIR_BAD_DESCRIPTIONS && badDesc);
if ( needsRepair ) {
logger.warn("Repairing standard header line for field " + line.getID() + " because"
+ (badCountType ? " -- count types disagree; header has " + line.getCountType() + " but standard is " + standard.getCountType() : "")
+ (badType ? " -- type disagree; header has " + line.getType() + " but standard is " + standard.getType() : "")
+ (badCount ? " -- counts disagree; header has " + line.getCount() + " but standard is " + standard.getCount() : "")
+ (badDesc ? " -- descriptions disagree; header has '" + line.getDescription() + "' but standard is '" + standard.getDescription() + "'": ""));
if ( GeneralUtils.DEBUG_MODE_ENABLED ) {
System.err.println("Repairing standard header line for field " + line.getID() + " because"
+ (badCountType ? " -- count types disagree; header has " + line.getCountType() + " but standard is " + standard.getCountType() : "")
+ (badType ? " -- type disagree; header has " + line.getType() + " but standard is " + standard.getType() : "")
+ (badCount ? " -- counts disagree; header has " + line.getCount() + " but standard is " + standard.getCount() : "")
+ (badDesc ? " -- descriptions disagree; header has '" + line.getDescription() + "' but standard is '" + standard.getDescription() + "'": ""));
}
return standard;
} else
return line;

View File

@ -28,17 +28,22 @@ package org.broadinstitute.variant.vcf;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord;
import org.apache.commons.io.FilenameUtils;
import org.apache.log4j.Logger;
import org.broad.tribble.FeatureCodecHeader;
import org.broad.tribble.readers.PositionalBufferedStream;
import org.broadinstitute.variant.utils.GeneralUtils;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.variant.variantcontext.VariantContext;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.*;
public class VCFUtils {
public static Set<VCFHeaderLine> smartMergeHeaders(Collection<VCFHeader> headers, Logger logger) throws IllegalStateException {
public static Set<VCFHeaderLine> smartMergeHeaders(Collection<VCFHeader> headers, boolean emitWarnings) throws IllegalStateException {
HashMap<String, VCFHeaderLine> map = new HashMap<String, VCFHeaderLine>(); // from KEY.NAME -> line
HeaderConflictWarner conflictWarner = new HeaderConflictWarner(logger);
HeaderConflictWarner conflictWarner = new HeaderConflictWarner(emitWarnings);
// todo -- needs to remove all version headers from sources and add its own VCF version line
for ( VCFHeader source : headers ) {
@ -193,19 +198,48 @@ public class VCFUtils {
return assembly;
}
/** Only displays a warning if a logger is provided and an identical warning hasn't been already issued */
/**
* Read all of the VCF records from source into memory, returning the header and the VariantContexts
*
* @param source the file to read, must be in VCF4 format
* @return
* @throws java.io.IOException
*/
public static Pair<VCFHeader, List<VariantContext>> readVCF(final File source) throws IOException {
// read in the features
final List<VariantContext> vcs = new ArrayList<VariantContext>();
final VCFCodec codec = new VCFCodec();
PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(source));
FeatureCodecHeader header = codec.readHeader(pbs);
pbs.close();
pbs = new PositionalBufferedStream(new FileInputStream(source));
pbs.skip(header.getHeaderEnd());
final VCFHeader vcfHeader = (VCFHeader)header.getHeaderValue();
while ( ! pbs.isDone() ) {
final VariantContext vc = codec.decode(pbs);
if ( vc != null )
vcs.add(vc);
}
return new Pair<VCFHeader, List<VariantContext>>(vcfHeader, vcs);
}
/** Only displays a warning if warnings are enabled and an identical warning hasn't been already issued */
private static final class HeaderConflictWarner {
Logger logger;
boolean emitWarnings;
Set<String> alreadyIssued = new HashSet<String>();
private HeaderConflictWarner(final Logger logger) {
this.logger = logger;
private HeaderConflictWarner( final boolean emitWarnings ) {
this.emitWarnings = emitWarnings;
}
public void warn(final VCFHeaderLine line, final String msg) {
if ( logger != null && ! alreadyIssued.contains(line.getKey()) ) {
if ( GeneralUtils.DEBUG_MODE_ENABLED && emitWarnings && ! alreadyIssued.contains(line.getKey()) ) {
alreadyIssued.add(line.getKey());
logger.warn(msg);
System.err.println(msg);
}
}
}

View File

@ -35,8 +35,8 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.variant.bcf2.BCF2Utils;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.variant.vcf.VCFCodec;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.variant.variantcontext.VariantContextTestProvider;

View File

@ -25,11 +25,9 @@
package org.broadinstitute.sting.utils;
import cern.jet.math.Arithmetic;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.jgrapht.alg.StrongConnectivityInspector;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import org.testng.Assert;

View File

@ -35,8 +35,8 @@ import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.variant.variantcontext.VariantContext;
import org.broadinstitute.variant.variantcontext.VariantContextTestProvider;
import org.broadinstitute.variant.vcf.VCFCodec;

View File

@ -23,10 +23,9 @@
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.variant.vcf;
package org.broadinstitute.sting.utils.variant;
import org.broadinstitute.sting.WalkerTest;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.annotations.Test;
import java.io.File;

View File

@ -23,13 +23,13 @@
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.variant.variantcontext;
package org.broadinstitute.sting.utils.variant;
import com.google.caliper.Param;
import com.google.caliper.SimpleBenchmark;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.variant.variantcontext.*;
import org.broadinstitute.variant.vcf.VCFCodec;
import java.util.ArrayList;
@ -73,8 +73,6 @@ public class VariantContextBenchmark extends SimpleBenchmark {
MERGE
}
private GenomeLocParser b37GenomeLocParser;
@Override protected void setUp() {
// TODO -- update for new tribble interface
// try {

View File

@ -0,0 +1,141 @@
package org.broadinstitute.variant;
import org.testng.Assert;
import java.io.File;
import java.io.IOException;
import java.util.*;
/**
* Base class for test classes within org.broadinstitute.variant
*/
public class VariantBaseTest {
public static final String hg19Reference = "/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta";
public static final String b37KGReference = "/humgen/1kg/reference/human_g1k_v37.fasta";
// TODO: change this to an appropriate value once the move to the Picard repo takes place
public static final String variantTestDataRoot = new File("private/testdata/").getAbsolutePath() + "/";
/**
* Simple generic utility class to creating TestNG data providers:
*
* 1: inherit this class, as in
*
* private class SummarizeDifferenceTest extends TestDataProvider {
* public SummarizeDifferenceTest() {
* super(SummarizeDifferenceTest.class);
* }
* ...
* }
*
* Provide a reference to your class to the TestDataProvider constructor.
*
* 2: Create instances of your subclass. Return from it the call to getTests, providing
* the class type of your test
*
* @DataProvider(name = "summaries"
* public Object[][] createSummaries() {
* new SummarizeDifferenceTest().addDiff("A", "A").addSummary("A:2");
* new SummarizeDifferenceTest().addDiff("A", "B").addSummary("A:1", "B:1");
* return SummarizeDifferenceTest.getTests(SummarizeDifferenceTest.class);
* }
*
* This class magically tracks created objects of this
*/
public static class TestDataProvider {
private static final Map<Class, List<Object>> tests = new HashMap<Class, List<Object>>();
protected String name;
/**
* Create a new TestDataProvider instance bound to the class variable C
* @param c
*/
public TestDataProvider(Class c, String name) {
if ( ! tests.containsKey(c) )
tests.put(c, new ArrayList<Object>());
tests.get(c).add(this);
this.name = name;
}
public TestDataProvider(Class c) {
this(c, "");
}
public void setName(final String name) {
this.name = name;
}
/**
* Return all of the data providers in the form expected by TestNG of type class C
* @param c
* @return
*/
public static Object[][] getTests(Class c) {
List<Object[]> params2 = new ArrayList<Object[]>();
for ( Object x : tests.get(c) ) params2.add(new Object[]{x});
return params2.toArray(new Object[][]{});
}
@Override
public String toString() {
return "TestDataProvider("+name+")";
}
}
/**
* Creates a temp file that will be deleted on exit after tests are complete.
* @param name Prefix of the file.
* @param extension Extension to concat to the end of the file.
* @return A file in the temporary directory starting with name, ending with extension, which will be deleted after the program exits.
*/
public static File createTempFile(String name, String extension) {
try {
File file = File.createTempFile(name, extension);
file.deleteOnExit();
return file;
} catch (IOException ex) {
throw new RuntimeException("Cannot create temp file: " + ex.getMessage(), ex);
}
}
private static final double DEFAULT_FLOAT_TOLERANCE = 1e-1;
public static final void assertEqualsDoubleSmart(final Object actual, final Double expected) {
Assert.assertTrue(actual instanceof Double, "Not a double");
assertEqualsDoubleSmart((double)(Double)actual, (double)expected);
}
public static final void assertEqualsDoubleSmart(final Object actual, final Double expected, final double tolerance) {
Assert.assertTrue(actual instanceof Double, "Not a double");
assertEqualsDoubleSmart((double)(Double)actual, (double)expected, tolerance);
}
public static final void assertEqualsDoubleSmart(final double actual, final double expected) {
assertEqualsDoubleSmart(actual, expected, DEFAULT_FLOAT_TOLERANCE);
}
public static final <T> void assertEqualsSet(final Set<T> actual, final Set<T> expected, final String info) {
final Set<T> actualSet = new HashSet<T>(actual);
final Set<T> expectedSet = new HashSet<T>(expected);
Assert.assertTrue(actualSet.equals(expectedSet), info); // note this is necessary due to testng bug for set comps
}
public static void assertEqualsDoubleSmart(final double actual, final double expected, final double tolerance) {
assertEqualsDoubleSmart(actual, expected, tolerance, null);
}
public static void assertEqualsDoubleSmart(final double actual, final double expected, final double tolerance, final String message) {
if ( Double.isNaN(expected) ) // NaN == NaN => false unfortunately
Assert.assertTrue(Double.isNaN(actual), "expected is nan, actual is not");
else if ( Double.isInfinite(expected) ) // NaN == NaN => false unfortunately
Assert.assertTrue(Double.isInfinite(actual), "expected is infinite, actual is not");
else {
final double delta = Math.abs(actual - expected);
final double ratio = Math.abs(actual / expected - 1.0);
Assert.assertTrue(delta < tolerance || ratio < tolerance, "expected = " + expected + " actual = " + actual
+ " not within tolerance " + tolerance
+ (message == null ? "" : "message: " + message));
}
}
}

View File

@ -25,12 +25,9 @@
package org.broadinstitute.variant.bcf2;
// the imports for unit testing.
import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.variant.VariantBaseTest;
import org.broadinstitute.variant.variantcontext.writer.BCF2Encoder;
import org.testng.Assert;
import org.testng.annotations.BeforeSuite;
@ -47,7 +44,7 @@ import java.util.Collections;
import java.util.List;
public class BCF2EncoderDecoderUnitTest extends BaseTest {
public class BCF2EncoderDecoderUnitTest extends VariantBaseTest {
private final double FLOAT_TOLERANCE = 1e-6;
final List<BCF2TypedValue> primitives = new ArrayList<BCF2TypedValue>();
final List<BCF2TypedValue> basicTypes = new ArrayList<BCF2TypedValue>();
@ -561,7 +558,7 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest {
final double valueFloat = (Double)tv.value;
final double decodedFloat = (Double)decoded;
BaseTest.assertEqualsDoubleSmart(decodedFloat, valueFloat, FLOAT_TOLERANCE);
VariantBaseTest.assertEqualsDoubleSmart(decodedFloat, valueFloat, FLOAT_TOLERANCE);
} else
Assert.assertEquals(decoded, tv.value);
}

View File

@ -25,8 +25,8 @@
package org.broadinstitute.variant.bcf2;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.variant.VariantBaseTest;
import org.broadinstitute.variant.utils.GeneralUtils;
import org.broadinstitute.variant.vcf.*;
import java.util.*;
@ -37,7 +37,7 @@ import org.testng.annotations.Test;
/**
* Tests for BCF2Utils
*/
public final class BCF2UtilsUnitTest extends BaseTest {
public final class BCF2UtilsUnitTest extends VariantBaseTest {
@DataProvider(name = "CollapseExpandTest")
public Object[][] makeCollapseExpandTest() {
List<Object[]> tests = new ArrayList<Object[]>();
@ -87,7 +87,7 @@ public final class BCF2UtilsUnitTest extends BaseTest {
final List<VCFHeaderLine> empty = Collections.emptyList();
final List<List<VCFHeaderLine>> permutations = extrasToTake == 0
? Collections.singletonList(empty)
: Utils.makePermutations(extraLines, extrasToTake, false);
: GeneralUtils.makePermutations(extraLines, extrasToTake, false);
for ( final List<VCFHeaderLine> permutation : permutations ) {
for ( int i = -1; i < inputLines.size(); i++ ) {
final List<VCFHeaderLine> allLines = new ArrayList<VCFHeaderLine>(inputLines);
@ -113,7 +113,7 @@ public final class BCF2UtilsUnitTest extends BaseTest {
final List<List<String>> permutations = testSamples.isEmpty()
? Collections.singletonList(testSamples)
: Utils.makePermutations(testSamples, testSamples.size(), false);
: GeneralUtils.makePermutations(testSamples, testSamples.size(), false);
for ( final List<String> testSamplesPermutation : permutations ) {
final VCFHeader testHeaderWithSamples = new VCFHeader(inputHeader.getMetaDataInInputOrder(), testSamplesPermutation);
final boolean expectedConsistent = testSamples.equals(inSamples);

View File

@ -25,21 +25,18 @@
package org.broadinstitute.variant.utils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.variant.VariantBaseTest;
import org.testng.Assert;
import org.broadinstitute.sting.BaseTest;
import org.testng.annotations.Test;
import org.testng.annotations.BeforeClass;
public class BaseUtilsUnitTest extends BaseTest {
public class BaseUtilsUnitTest extends VariantBaseTest {
@BeforeClass
public void init() { }
@Test
public void testMostFrequentBaseFraction() {
logger.warn("Executing testMostFrequentBaseFraction");
compareFrequentBaseFractionToExpected("AAAAA", 1.0);
compareFrequentBaseFractionToExpected("ACCG", 0.5);
compareFrequentBaseFractionToExpected("ACCCCTTTTG", 4.0/10.0);
@ -47,7 +44,7 @@ public class BaseUtilsUnitTest extends BaseTest {
private void compareFrequentBaseFractionToExpected(String sequence, double expected) {
double fraction = BaseUtils.mostFrequentBaseFraction(sequence.getBytes());
Assert.assertTrue(MathUtils.compareDoubles(fraction, expected) == 0);
Assert.assertTrue(GeneralUtils.compareDoubles(fraction, expected) == 0);
}
@Test
@ -67,8 +64,6 @@ public class BaseUtilsUnitTest extends BaseTest {
@Test
public void testTransitionTransversion() {
logger.warn("Executing testTransitionTransversion");
Assert.assertTrue( BaseUtils.SNPSubstitutionType( (byte)'A', (byte)'T' ) == BaseUtils.BaseSubstitutionType.TRANSVERSION );
Assert.assertTrue( BaseUtils.SNPSubstitutionType( (byte)'A', (byte)'C' ) == BaseUtils.BaseSubstitutionType.TRANSVERSION );
Assert.assertTrue( BaseUtils.SNPSubstitutionType( (byte)'A', (byte)'G' ) == BaseUtils.BaseSubstitutionType.TRANSITION );
@ -94,8 +89,6 @@ public class BaseUtilsUnitTest extends BaseTest {
@Test
public void testReverseComplementString() {
logger.warn("Executing testReverseComplementString");
compareRCStringToExpected("ACGGT", "ACCGT");
compareRCStringToExpected("TCGTATATCTCGCTATATATATATAGCTCTAGTATA", "TATACTAGAGCTATATATATATAGCGAGATATACGA");
compareRCStringToExpected("AAAN", "NTTT");

View File

@ -28,6 +28,7 @@ package org.broadinstitute.variant.variantcontext;
// the imports for unit testing.
import org.broadinstitute.variant.VariantBaseTest;
import org.testng.Assert;
import org.testng.annotations.BeforeSuite;
import org.testng.annotations.Test;
@ -44,7 +45,7 @@ import org.testng.annotations.Test;
/**
* Basic unit test for RecalData
*/
public class AlleleUnitTest {
public class AlleleUnitTest extends VariantBaseTest {
Allele ARef, A, T, ATIns, ATCIns, NoCall;
@BeforeSuite

View File

@ -30,9 +30,9 @@ package org.broadinstitute.variant.variantcontext;
import org.broad.tribble.TribbleException;
import org.broadinstitute.variant.VariantBaseTest;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.variant.utils.GeneralUtils;
import org.testng.Assert;
import org.testng.annotations.Test;
@ -44,7 +44,7 @@ import java.util.List;
/**
* Basic unit test for Genotype likelihoods objects
*/
public class GenotypeLikelihoodsUnitTest {
public class GenotypeLikelihoodsUnitTest extends VariantBaseTest {
double [] v = new double[]{-10.5, -1.25, -5.11};
final static String vGLString = "-10.50,-1.25,-5.11";
final static String vPLString = "93,0,39";
@ -88,7 +88,7 @@ public class GenotypeLikelihoodsUnitTest {
//Linear scale
glMap = gl.getAsMap(true);
double [] vl = MathUtils.normalizeFromLog10(v);
double [] vl = GeneralUtils.normalizeFromLog10(v);
Assert.assertEquals(vl[GenotypeType.HOM_REF.ordinal()-1],glMap.get(GenotypeType.HOM_REF));
Assert.assertEquals(vl[GenotypeType.HET.ordinal()-1],glMap.get(GenotypeType.HET));
Assert.assertEquals(vl[GenotypeType.HOM_VAR.ordinal()-1],glMap.get(GenotypeType.HOM_VAR));
@ -118,7 +118,7 @@ public class GenotypeLikelihoodsUnitTest {
//GQ for the best guess genotype
Assert.assertEquals(gl.getLog10GQ(GenotypeType.HET),-3.9);
double[] test = MathUtils.normalizeFromLog10(gl.getAsVector());
double[] test = GeneralUtils.normalizeFromLog10(gl.getAsVector());
//GQ for the other genotypes
Assert.assertEquals(gl.getLog10GQ(GenotypeType.HOM_REF), Math.log10(1.0 - test[GenotypeType.HOM_REF.ordinal()-1]));

View File

@ -29,13 +29,13 @@ package org.broadinstitute.variant.variantcontext;
// the imports for unit testing.
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.variant.VariantBaseTest;
import org.testng.Assert;
import org.testng.annotations.BeforeSuite;
import org.testng.annotations.Test;
public class GenotypeUnitTest extends BaseTest {
public class GenotypeUnitTest extends VariantBaseTest {
Allele A, Aref, T;
@BeforeSuite

View File

@ -30,8 +30,8 @@ package org.broadinstitute.variant.variantcontext;
import org.broad.tribble.util.ParsingUtils;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.variant.VariantBaseTest;
import org.broadinstitute.variant.utils.GeneralUtils;
import org.testng.Assert;
import org.testng.annotations.BeforeSuite;
import org.testng.annotations.DataProvider;
@ -40,7 +40,7 @@ import org.testng.annotations.Test;
import java.util.*;
public class GenotypesContextUnitTest extends BaseTest {
public class GenotypesContextUnitTest extends VariantBaseTest {
Allele Aref, C, T;
Genotype AA, AT, TT, AC, CT, CC, MISSING;
List<Genotype> allGenotypes;
@ -128,7 +128,7 @@ public class GenotypesContextUnitTest extends BaseTest {
// sorted
new GenotypesContextProvider(maker, samples);
// unsorted
new GenotypesContextProvider(maker, Utils.reverse(samples));
new GenotypesContextProvider(maker, GeneralUtils.reverse(samples));
}
}

View File

@ -25,15 +25,14 @@
package org.broadinstitute.variant.variantcontext;
import org.apache.log4j.Logger;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.FeatureCodecHeader;
import org.broad.tribble.readers.PositionalBufferedStream;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.variant.VariantBaseTest;
import org.broadinstitute.variant.bcf2.BCF2Codec;
import org.broadinstitute.variant.utils.GeneralUtils;
import org.broadinstitute.variant.vcf.*;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.variant.utils.Pair;
import org.broadinstitute.variant.variantcontext.writer.Options;
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
import org.testng.Assert;
@ -50,8 +49,6 @@ import java.util.*;
* @since Date created
*/
public class VariantContextTestProvider {
final protected static Logger logger = Logger.getLogger(VariantContextTestProvider.class);
final private static boolean ENABLE_GENOTYPE_TESTS = true;
final private static boolean ENABLE_A_AND_G_TESTS = true;
final private static boolean ENABLE_VARARRAY_TESTS = true;
@ -68,12 +65,12 @@ public class VariantContextTestProvider {
private final static List<File> testSourceVCFs = new ArrayList<File>();
static {
testSourceVCFs.add(new File(BaseTest.privateTestDir + "ILLUMINA.wex.broad_phase2_baseline.20111114.both.exome.genotypes.1000.vcf"));
testSourceVCFs.add(new File(BaseTest.privateTestDir + "ex2.vcf"));
testSourceVCFs.add(new File(BaseTest.privateTestDir + "dbsnp_135.b37.1000.vcf"));
testSourceVCFs.add(new File(VariantBaseTest.variantTestDataRoot + "ILLUMINA.wex.broad_phase2_baseline.20111114.both.exome.genotypes.1000.vcf"));
testSourceVCFs.add(new File(VariantBaseTest.variantTestDataRoot + "ex2.vcf"));
testSourceVCFs.add(new File(VariantBaseTest.variantTestDataRoot + "dbsnp_135.b37.1000.vcf"));
if ( ENABLE_SYMBOLIC_ALLELE_TESTS ) {
testSourceVCFs.add(new File(BaseTest.privateTestDir + "diagnosis_targets_testfile.vcf"));
testSourceVCFs.add(new File(BaseTest.privateTestDir + "VQSR.mixedTest.recal"));
testSourceVCFs.add(new File(VariantBaseTest.variantTestDataRoot + "diagnosis_targets_testfile.vcf"));
testSourceVCFs.add(new File(VariantBaseTest.variantTestDataRoot + "VQSR.mixedTest.recal"));
}
}
@ -156,12 +153,10 @@ public class VariantContextTestProvider {
Pair<VCFHeader, Iterable<VariantContext>> x = readAllVCs( file, codec );
List<VariantContext> fullyDecoded = new ArrayList<VariantContext>();
logger.warn("Reading records from " + file);
for ( final VariantContext raw : x.getSecond() ) {
if ( raw != null )
fullyDecoded.add(raw.fullyDecode(x.getFirst(), false));
}
logger.warn("Done reading " + file);
TEST_DATAs.add(new VariantContextTestData(x.getFirst(), fullyDecoded));
}
@ -788,12 +783,12 @@ public class VariantContextTestProvider {
assertAttributesEquals(actual.getAttributes(), expected.getAttributes());
Assert.assertEquals(actual.filtersWereApplied(), expected.filtersWereApplied(), "filtersWereApplied");
Assert.assertEquals(actual.isFiltered(), expected.isFiltered(), "isFiltered");
BaseTest.assertEqualsSet(actual.getFilters(), expected.getFilters(), "filters");
BaseTest.assertEqualsDoubleSmart(actual.getPhredScaledQual(), expected.getPhredScaledQual());
VariantBaseTest.assertEqualsSet(actual.getFilters(), expected.getFilters(), "filters");
VariantBaseTest.assertEqualsDoubleSmart(actual.getPhredScaledQual(), expected.getPhredScaledQual());
Assert.assertEquals(actual.hasGenotypes(), expected.hasGenotypes(), "hasGenotypes");
if ( expected.hasGenotypes() ) {
BaseTest.assertEqualsSet(actual.getSampleNames(), expected.getSampleNames(), "sample names set");
VariantBaseTest.assertEqualsSet(actual.getSampleNames(), expected.getSampleNames(), "sample names set");
Assert.assertEquals(actual.getSampleNamesOrderedByName(), expected.getSampleNamesOrderedByName(), "sample names");
final Set<String> samples = expected.getSampleNames();
for ( final String sample : samples ) {
@ -879,7 +874,7 @@ public class VariantContextTestProvider {
private static void assertAttributeEquals(final String key, final Object actual, final Object expected) {
if ( expected instanceof Double ) {
// must be very tolerant because doubles are being rounded to 2 sig figs
BaseTest.assertEqualsDoubleSmart(actual, (Double)expected, 1e-2);
VariantBaseTest.assertEqualsDoubleSmart(actual, (Double)expected, 1e-2);
} else
Assert.assertEquals(actual, expected, "Attribute " + key);
}
@ -935,7 +930,7 @@ public class VariantContextTestProvider {
}
private static List<List<Allele>> makeAllGenotypes(final List<Allele> alleles, final int highestPloidy) {
return Utils.makePermutations(alleles, highestPloidy, true);
return GeneralUtils.makePermutations(alleles, highestPloidy, true);
}
public static void assertEquals(final VCFHeader actual, final VCFHeader expected) {

View File

@ -29,9 +29,8 @@ package org.broadinstitute.variant.variantcontext;
// the imports for unit testing.
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.VariantBaseTest;
import org.broadinstitute.variant.utils.Pair;
import org.testng.annotations.BeforeSuite;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.DataProvider;
@ -41,7 +40,7 @@ import org.testng.Assert;
import java.util.*;
public class VariantContextUnitTest extends BaseTest {
public class VariantContextUnitTest extends VariantBaseTest {
Allele A, Aref, C, T, Tref;
Allele del, delRef, ATC, ATCref;
@ -500,30 +499,30 @@ public class VariantContextUnitTest extends BaseTest {
Pair<List<Integer>,byte[]> result;
byte[] refBytes = "TATCATCATCGGA".getBytes();
Assert.assertEquals(GATKVariantContextUtils.findNumberofRepetitions("ATG".getBytes(), "ATGATGATGATG".getBytes()),4);
Assert.assertEquals(GATKVariantContextUtils.findNumberofRepetitions("G".getBytes(), "ATGATGATGATG".getBytes()),0);
Assert.assertEquals(GATKVariantContextUtils.findNumberofRepetitions("T".getBytes(), "T".getBytes()),1);
Assert.assertEquals(GATKVariantContextUtils.findNumberofRepetitions("AT".getBytes(), "ATGATGATCATG".getBytes()),1);
Assert.assertEquals(GATKVariantContextUtils.findNumberofRepetitions("CCC".getBytes(), "CCCCCCCC".getBytes()),2);
Assert.assertEquals(VariantContextUtils.findNumberofRepetitions("ATG".getBytes(), "ATGATGATGATG".getBytes()),4);
Assert.assertEquals(VariantContextUtils.findNumberofRepetitions("G".getBytes(), "ATGATGATGATG".getBytes()),0);
Assert.assertEquals(VariantContextUtils.findNumberofRepetitions("T".getBytes(), "T".getBytes()),1);
Assert.assertEquals(VariantContextUtils.findNumberofRepetitions("AT".getBytes(), "ATGATGATCATG".getBytes()),1);
Assert.assertEquals(VariantContextUtils.findNumberofRepetitions("CCC".getBytes(), "CCCCCCCC".getBytes()),2);
Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("ATG".getBytes()),3);
Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("AAA".getBytes()),1);
Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("CACACAC".getBytes()),7);
Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("CACACA".getBytes()),2);
Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("CATGCATG".getBytes()),4);
Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("AATAATA".getBytes()),7);
Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("ATG".getBytes()),3);
Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("AAA".getBytes()),1);
Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("CACACAC".getBytes()),7);
Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("CACACA".getBytes()),2);
Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("CATGCATG".getBytes()),4);
Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("AATAATA".getBytes()),7);
// A*,ATC, context = ATC ATC ATC : (ATC)3 -> (ATC)4
VariantContext vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(nullR,atc)).make();
result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
result = VariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
Assert.assertEquals(result.getFirst().toArray()[0],3);
Assert.assertEquals(result.getFirst().toArray()[1],4);
Assert.assertEquals(result.getSecond().length,3);
// ATC*,A,ATCATC
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+3, Arrays.asList(Allele.create("AATC", true),nullA,atcatc)).make();
result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
result = VariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
Assert.assertEquals(result.getFirst().toArray()[0],3);
Assert.assertEquals(result.getFirst().toArray()[1],2);
Assert.assertEquals(result.getFirst().toArray()[2],4);
@ -532,7 +531,7 @@ public class VariantContextUnitTest extends BaseTest {
// simple non-tandem deletion: CCCC*, -
refBytes = "TCCCCCCCCATG".getBytes();
vc = new VariantContextBuilder("foo", delLoc, 10, 14, Arrays.asList(ccccR,nullA)).make();
result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
result = VariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
Assert.assertEquals(result.getFirst().toArray()[0],8);
Assert.assertEquals(result.getFirst().toArray()[1],4);
Assert.assertEquals(result.getSecond().length,1);
@ -540,7 +539,7 @@ public class VariantContextUnitTest extends BaseTest {
// CCCC*,CC,-,CCCCCC, context = CCC: (C)7 -> (C)5,(C)3,(C)9
refBytes = "TCCCCCCCAGAGAGAG".getBytes();
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+4, Arrays.asList(ccccR,cc, nullA,cccccc)).make();
result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
result = VariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
Assert.assertEquals(result.getFirst().toArray()[0],7);
Assert.assertEquals(result.getFirst().toArray()[1],5);
Assert.assertEquals(result.getFirst().toArray()[2],3);
@ -550,7 +549,7 @@ public class VariantContextUnitTest extends BaseTest {
// GAGA*,-,GAGAGAGA
refBytes = "TGAGAGAGAGATTT".getBytes();
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+4, Arrays.asList(gagaR, nullA,gagagaga)).make();
result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
result = VariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
Assert.assertEquals(result.getFirst().toArray()[0],5);
Assert.assertEquals(result.getFirst().toArray()[1],3);
Assert.assertEquals(result.getFirst().toArray()[2],7);

View File

@ -26,12 +26,8 @@
package org.broadinstitute.variant.variantcontext;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.VariantBaseTest;
import org.broadinstitute.variant.utils.GeneralUtils;
import org.testng.Assert;
import org.testng.annotations.BeforeSuite;
import org.testng.annotations.DataProvider;
@ -41,19 +37,17 @@ import java.io.File;
import java.io.FileNotFoundException;
import java.util.*;
public class VariantContextUtilsUnitTest extends BaseTest {
public class VariantContextUtilsUnitTest extends VariantBaseTest {
Allele Aref, T, C, G, Cref, ATC, ATCATC;
private GenomeLocParser genomeLocParser;
@BeforeSuite
public void setup() {
final File referenceFile = new File(b37KGReference);
try {
IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(referenceFile);
genomeLocParser = new GenomeLocParser(seq);
IndexedFastaSequenceFile seq = new IndexedFastaSequenceFile(referenceFile);
}
catch(FileNotFoundException ex) {
throw new UserException.CouldNotReadInputFile(referenceFile,ex);
throw new RuntimeException(referenceFile.getAbsolutePath(),ex);
}
// alleles
@ -658,7 +652,7 @@ public class VariantContextUtilsUnitTest extends BaseTest {
public void testRepeatDetectorTest(RepeatDetectorTest cfg) {
// test alleles are equal
Assert.assertEquals(GATKVariantContextUtils.isTandemRepeat(cfg.vc, cfg.ref.getBytes()), cfg.isTrueRepeat);
Assert.assertEquals(VariantContextUtils.isTandemRepeat(cfg.vc, cfg.ref.getBytes()), cfg.isTrueRepeat);
}
// --------------------------------------------------------------------------------
@ -704,7 +698,7 @@ public class VariantContextUtilsUnitTest extends BaseTest {
@Test(dataProvider = "ReverseClippingPositionTestProvider")
public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) {
int result = GATKVariantContextUtils.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false);
int result = VariantContextUtils.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false);
Assert.assertEquals(result, cfg.expectedClip);
}
@ -782,7 +776,7 @@ public class VariantContextUtilsUnitTest extends BaseTest {
@Test(dataProvider = "SplitBiallelics")
public void testSplitBiallelicsNoGenotypes(final VariantContext vc, final List<VariantContext> expectedBiallelics) {
final List<VariantContext> biallelics = GATKVariantContextUtils.splitVariantContextToBiallelics(vc);
final List<VariantContext> biallelics = VariantContextUtils.splitVariantContextToBiallelics(vc);
Assert.assertEquals(biallelics.size(), expectedBiallelics.size());
for ( int i = 0; i < biallelics.size(); i++ ) {
final VariantContext actual = biallelics.get(i);
@ -796,14 +790,14 @@ public class VariantContextUtilsUnitTest extends BaseTest {
final List<Genotype> genotypes = new ArrayList<Genotype>();
int sampleI = 0;
for ( final List<Allele> alleles : Utils.makePermutations(vc.getAlleles(), 2, true) ) {
for ( final List<Allele> alleles : GeneralUtils.makePermutations(vc.getAlleles(), 2, true) ) {
genotypes.add(GenotypeBuilder.create("sample" + sampleI++, alleles));
}
genotypes.add(GenotypeBuilder.createMissing("missing", 2));
final VariantContext vcWithGenotypes = new VariantContextBuilder(vc).genotypes(genotypes).make();
final List<VariantContext> biallelics = GATKVariantContextUtils.splitVariantContextToBiallelics(vcWithGenotypes);
final List<VariantContext> biallelics = VariantContextUtils.splitVariantContextToBiallelics(vcWithGenotypes);
for ( int i = 0; i < biallelics.size(); i++ ) {
final VariantContext actual = biallelics.get(i);
Assert.assertEquals(actual.getNSamples(), vcWithGenotypes.getNSamples()); // not dropping any samples

View File

@ -25,13 +25,8 @@
package org.broadinstitute.variant.variantcontext;
import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.variant.VariantBaseTest;
import org.testng.Assert;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
@ -49,8 +44,7 @@ import java.util.Map;
*
* Test out parts of the VariantJEXLContext
*/
public class VariantJEXLContextUnitTest extends BaseTest {
public class VariantJEXLContextUnitTest extends VariantBaseTest {
private static String expression = "QUAL > 500.0";
private static VariantContextUtils.JexlVCMatchExp exp;
@ -60,27 +54,15 @@ public class VariantJEXLContextUnitTest extends BaseTest {
Allele ATC, ATCref;
// A [ref] / T at 10
GenomeLoc snpLoc;
// - / ATC [ref] from 20-23
private static int startingChr = 1;
private static int endingChr = 2;
private static int readCount = 100;
private static int DEFAULT_READ_LENGTH = ArtificialSAMUtils.DEFAULT_READ_LENGTH;
static SAMFileHeader header;
private static GenomeLocParser genomeLocParser;
@BeforeClass
public void beforeClass() {
header = ArtificialSAMUtils.createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
try {
exp = new VariantContextUtils.JexlVCMatchExp("name", VariantContextUtils.engine.createExpression(expression));
} catch (Exception e) {
Assert.fail("Unable to create expression" + e.getMessage());
}
snpLoc = genomeLocParser.createGenomeLoc("chr1", 10, 10, true);
}
@BeforeMethod
@ -142,9 +124,7 @@ public class VariantJEXLContextUnitTest extends BaseTest {
private JEXLMap getVarContext() {
List<Allele> alleles = Arrays.asList(Aref, T);
VariantContext vc = new VariantContextBuilder("test", snpLoc.getContig(), snpLoc.getStart(), snpLoc.getStop(), alleles).make();
VariantContext vc = new VariantContextBuilder("test", "chr1", 10, 10, alleles).make();
return new JEXLMap(Arrays.asList(exp),vc);
}
}

View File

@ -29,16 +29,11 @@ import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broad.tribble.AbstractFeatureReader;
import org.broad.tribble.FeatureReader;
import org.broad.tribble.Tribble;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.variant.VariantBaseTest;
import org.broadinstitute.variant.vcf.VCFCodec;
import org.broadinstitute.variant.vcf.VCFHeader;
import org.broadinstitute.variant.vcf.VCFHeaderLine;
import org.broadinstitute.variant.vcf.VCFHeaderVersion;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.variant.variantcontext.*;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
@ -58,22 +53,20 @@ import java.util.*;
* <p/>
* This class tests out the ability of the VCF writer to correctly write VCF files
*/
public class VCFWriterUnitTest extends BaseTest {
public class VCFWriterUnitTest extends VariantBaseTest {
private Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
private Set<String> additionalColumns = new HashSet<String>();
private File fakeVCFFile = new File("FAKEVCFFILEFORTESTING.vcf");
private GenomeLocParser genomeLocParser;
private IndexedFastaSequenceFile seq;
@BeforeClass
public void beforeTests() {
File referenceFile = new File(hg18Reference);
File referenceFile = new File(hg19Reference);
try {
seq = new CachingIndexedFastaSequenceFile(referenceFile);
genomeLocParser = new GenomeLocParser(seq);
seq = new IndexedFastaSequenceFile(referenceFile);
}
catch(FileNotFoundException ex) {
throw new UserException.CouldNotReadInputFile(referenceFile,ex);
throw new RuntimeException(referenceFile.getAbsolutePath(), ex);
}
}
@ -108,7 +101,7 @@ public class VCFWriterUnitTest extends BaseTest {
fakeVCFFile.delete();
}
catch (IOException e ) {
throw new ReviewedStingException(e.getMessage());
throw new RuntimeException(e.getMessage());
}
}
@ -133,8 +126,6 @@ public class VCFWriterUnitTest extends BaseTest {
* @return a VCFRecord
*/
private VariantContext createVC(VCFHeader header) {
GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1",1);
List<Allele> alleles = new ArrayList<Allele>();
Set<String> filters = null;
Map<String, Object> attributes = new HashMap<String,Object>();
@ -148,7 +139,7 @@ public class VCFWriterUnitTest extends BaseTest {
Genotype gt = new GenotypeBuilder(name,alleles.subList(1,2)).GQ(0).attribute("BB", "1").phased(true).make();
genotypes.add(gt);
}
return new VariantContextBuilder("RANDOM", loc.getContig(), loc.getStart(), loc.getStop(), alleles)
return new VariantContextBuilder("RANDOM", "chr1", 1, 1, alleles)
.genotypes(genotypes).attributes(attributes).make();
}

Some files were not shown because too many files have changed in this diff Show More