org.broadinstitute.variant refactor, part 2
-removed sting dependencies from test classes -removed org.apache.log4j dependency -misc cleanup
This commit is contained in:
parent
1599c9a20e
commit
f63f27aa13
|
|
@ -57,7 +57,7 @@ import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
|||
import org.broadinstitute.sting.utils.MannWhitneyU;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.variant.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
|
|
|
|||
|
|
@ -53,11 +53,11 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.VCFHeaderLineCount;
|
||||
import org.broadinstitute.variant.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.variant.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
|
@ -79,7 +79,7 @@ public class TandemRepeatAnnotator extends InfoFieldAnnotation implements Standa
|
|||
if ( !vc.isIndel())
|
||||
return null;
|
||||
|
||||
Pair<List<Integer>,byte[]> result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, ref.getForwardBases());
|
||||
Pair<List<Integer>,byte[]> result = VariantContextUtils.getNumTandemRepeatUnits(vc, ref.getForwardBases());
|
||||
if (result == null)
|
||||
return null;
|
||||
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ import org.broadinstitute.sting.utils.MathUtils;
|
|||
import org.broadinstitute.variant.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
|||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
||||
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ import net.sf.samtools.CigarElement;
|
|||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.sting.gatk.downsampling.ReservoirDownsampler;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactACcounts;
|
|||
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactACset;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.variant.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
|
|||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.variant.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
|
|
|
|||
|
|
@ -49,8 +49,8 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.pairhmm.PairHMM;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
|
||||
|
||||
public class UnifiedArgumentCollection extends StandardCallerArgumentCollection {
|
||||
|
||||
|
|
@ -172,7 +172,7 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
|
|||
Sample ploidy - equivalent to number of chromosomes per pool. In pooled experiments this should be = # of samples in pool * individual sample ploidy
|
||||
*/
|
||||
@Argument(shortName="ploidy", fullName="sample_ploidy", doc="Plody (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy).", required=false)
|
||||
public int samplePloidy = GATKVariantContextUtils.DEFAULT_PLOIDY;
|
||||
public int samplePloidy = VariantContextUtils.DEFAULT_PLOIDY;
|
||||
|
||||
@Hidden
|
||||
@Argument(shortName="minqs", fullName="min_quality_score", doc="Min quality score to consider. Smaller numbers process faster. Default: Q1.", required=false)
|
||||
|
|
|
|||
|
|
@ -61,7 +61,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
|
@ -304,7 +304,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
|||
headerInfo.add(new VCFInfoHeaderLine(UnifiedGenotyperEngine.NUMBER_OF_DISCOVERED_ALLELES_KEY, 1, VCFHeaderLineType.Integer, "Number of alternate alleles discovered (but not necessarily genotyped) at this site"));
|
||||
|
||||
// add the pool values for each genotype
|
||||
if (UAC.samplePloidy != GATKVariantContextUtils.DEFAULT_PLOIDY) {
|
||||
if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) {
|
||||
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the alternate allele count, in the same order as listed, for each individual sample"));
|
||||
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the alternate allele fraction, in the same order as listed, for each individual sample"));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -61,7 +61,6 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcResult;
|
|||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.utils.BaseUtils;
|
||||
import org.broadinstitute.variant.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -135,7 +134,7 @@ public class UnifiedGenotyperEngine {
|
|||
// ---------------------------------------------------------------------------------------------------------
|
||||
@Requires({"toolkit != null", "UAC != null"})
|
||||
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) {
|
||||
this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), GATKVariantContextUtils.DEFAULT_PLOIDY);
|
||||
this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), VariantContextUtils.DEFAULT_PLOIDY);
|
||||
}
|
||||
|
||||
@Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0","ploidy>0"})
|
||||
|
|
@ -526,7 +525,7 @@ public class UnifiedGenotyperEngine {
|
|||
// if we are subsetting alleles (either because there were too many or because some were not polymorphic)
|
||||
// then we may need to trim the alleles (because the original VariantContext may have had to pad at the end).
|
||||
if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext ) // limitedContext callers need to handle allele trimming on their own to keep their perReadAlleleLikelihoodMap alleles in sync
|
||||
vcCall = GATKVariantContextUtils.reverseTrimAlleles(vcCall);
|
||||
vcCall = VariantContextUtils.reverseTrimAlleles(vcCall);
|
||||
|
||||
if ( annotationEngine != null && !limitedContext ) { // limitedContext callers need to handle annotations on their own by calling their own annotationEngine
|
||||
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
|
||||
|
|
@ -663,7 +662,7 @@ public class UnifiedGenotyperEngine {
|
|||
private void determineGLModelsToUse() {
|
||||
|
||||
String modelPrefix = "";
|
||||
if ( !UAC.GLmodel.name().contains(GPSTRING) && UAC.samplePloidy != GATKVariantContextUtils.DEFAULT_PLOIDY )
|
||||
if ( !UAC.GLmodel.name().contains(GPSTRING) && UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY )
|
||||
modelPrefix = GPSTRING;
|
||||
|
||||
if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") ) {
|
||||
|
|
|
|||
|
|
@ -47,7 +47,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -106,7 +105,7 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
|
|||
alleles.add(vc.getReference());
|
||||
alleles.addAll(chooseMostLikelyAlternateAlleles(vc, getMaxAltAlleles()));
|
||||
builder.alleles(alleles);
|
||||
builder.genotypes(GATKVariantContextUtils.subsetDiploidAlleles(vc, alleles, false));
|
||||
builder.genotypes(VariantContextUtils.subsetDiploidAlleles(vc, alleles, false));
|
||||
return builder.make();
|
||||
} else {
|
||||
return vc;
|
||||
|
|
@ -352,6 +351,6 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
|
|||
final List<Allele> allelesToUse,
|
||||
final boolean assignGenotypes,
|
||||
final int ploidy) {
|
||||
return GATKVariantContextUtils.subsetDiploidAlleles(vc, allelesToUse, assignGenotypes);
|
||||
return VariantContextUtils.subsetDiploidAlleles(vc, allelesToUse, assignGenotypes);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,10 +47,10 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.variant.variantcontext.Genotype;
|
||||
import org.broadinstitute.variant.variantcontext.GenotypesContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
|
|
@ -92,7 +92,7 @@ abstract class ExactAFCalc extends AFCalc {
|
|||
if ( sample.hasLikelihoods() ) {
|
||||
double[] gls = sample.getLikelihoods().getAsVector();
|
||||
|
||||
if ( MathUtils.sum(gls) < GATKVariantContextUtils.SUM_GL_THRESH_NOCALL )
|
||||
if ( MathUtils.sum(gls) < VariantContextUtils.SUM_GL_THRESH_NOCALL )
|
||||
genotypeLikelihoods.add(gls);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -48,7 +48,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
|||
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.GeneralPloidyGenotypeLikelihoods;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
|
|
@ -554,7 +553,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
|
|||
}
|
||||
|
||||
// if there is no mass on the (new) likelihoods, then just no-call the sample
|
||||
if ( MathUtils.sum(newLikelihoods) > GATKVariantContextUtils.SUM_GL_THRESH_NOCALL ) {
|
||||
if ( MathUtils.sum(newLikelihoods) > VariantContextUtils.SUM_GL_THRESH_NOCALL ) {
|
||||
newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES));
|
||||
}
|
||||
else {
|
||||
|
|
@ -566,7 +565,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
|
|||
gb.PL(newLikelihoods);
|
||||
|
||||
// if we weren't asked to assign a genotype, then just no-call the sample
|
||||
if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > GATKVariantContextUtils.SUM_GL_THRESH_NOCALL )
|
||||
if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > VariantContextUtils.SUM_GL_THRESH_NOCALL )
|
||||
gb.alleles(NO_CALL_ALLELES);
|
||||
else
|
||||
assignGenotype(gb, newLikelihoods, allelesToUse, ploidy);
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
|
||||
|
|
|
|||
|
|
@ -57,7 +57,6 @@ import org.broadinstitute.sting.utils.*;
|
|||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.utils.BaseUtils;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
|
||||
|
|
@ -204,7 +203,7 @@ public class GenotypingEngine {
|
|||
VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, call);
|
||||
|
||||
if( annotatedCall.getAlleles().size() != mergedVC.getAlleles().size() ) { // some alleles were removed so reverseTrimming might be necessary!
|
||||
annotatedCall = GATKVariantContextUtils.reverseTrimAlleles(annotatedCall);
|
||||
annotatedCall = VariantContextUtils.reverseTrimAlleles(annotatedCall);
|
||||
}
|
||||
|
||||
returnCalls.add( annotatedCall );
|
||||
|
|
|
|||
|
|
@ -72,7 +72,6 @@ import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
|||
import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileState;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
|
|
@ -298,7 +297,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
samplesList.addAll( samples );
|
||||
// initialize the UnifiedGenotyper Engine which is used to call into the exact model
|
||||
final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection( SCAC ); // this adapter is used so that the full set of unused UG arguments aren't exposed to the HC user
|
||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY);
|
||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
|
||||
|
||||
// create a UAC but with the exactCallsLog = null, so we only output the log for the HC caller itself, if requested
|
||||
UnifiedArgumentCollection simpleUAC = new UnifiedArgumentCollection(UAC);
|
||||
|
|
@ -308,7 +307,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
simpleUAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.min( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING ); // low values used for isActive determination only, default/user-specified values used for actual calling
|
||||
simpleUAC.CONTAMINATION_FRACTION = 0.0;
|
||||
simpleUAC.exactCallsLog = null;
|
||||
UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), simpleUAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY);
|
||||
UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), simpleUAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
|
||||
|
||||
// initialize the output VCF header
|
||||
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ import org.broadinstitute.sting.gatk.walkers.BAQMode;
|
|||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
|
|||
|
|
@ -328,7 +328,7 @@ public class GenotypeAndValidate extends RodWalker<GenotypeAndValidate.CountedDa
|
|||
if (vcfWriter != null) {
|
||||
Map<String, VCFHeader> header = GATKVCFUtils.getVCFHeadersFromRodPrefix(getToolkit(), alleles.getName());
|
||||
samples = SampleUtils.getSampleList(header, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
|
||||
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(header.values(), logger);
|
||||
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(header.values(), true);
|
||||
headerLines.add(new VCFHeaderLine("source", "GenotypeAndValidate"));
|
||||
vcfWriter.writeHeader(new VCFHeader(headerLines, samples));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -61,7 +61,6 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
|||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
|
|
@ -116,7 +115,7 @@ public class RegenotypeVariants extends RodWalker<Integer, Integer> implements T
|
|||
|
||||
String trackName = variantCollection.variants.getName();
|
||||
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName));
|
||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY);
|
||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
|
||||
|
||||
final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(trackName)));
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ import com.google.java.contract.Requires;
|
|||
import org.apache.commons.math.MathException;
|
||||
import org.apache.commons.math.stat.inference.ChiSquareTestImpl;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.Collection;
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ import org.broadinstitute.sting.utils.Utils;
|
|||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
|||
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ package org.broadinstitute.sting.utils.recalibration.covariates;
|
|||
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
|
|
@ -73,7 +73,7 @@ public class RepeatLengthCovariate implements ExperimentalCovariate {
|
|||
int maxRL = 0;
|
||||
for (int str = 1; str <= 8; str++) {
|
||||
if (i + str <= readBytes.length) {
|
||||
maxRL = Math.max(maxRL, GATKVariantContextUtils.findNumberofRepetitions(
|
||||
maxRL = Math.max(maxRL, VariantContextUtils.findNumberofRepetitions(
|
||||
Arrays.copyOfRange(readBytes, i, i + str),
|
||||
Arrays.copyOfRange(readBytes, i, readBytes.length)
|
||||
));
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.variant.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
import org.testng.Assert;
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
|||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.testng.Assert;
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.manage
|
|||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
|
|
|
|||
|
|
@ -164,7 +164,7 @@ public class CombineVariantsUnitTest {
|
|||
ArrayList<VCFHeader> headers = new ArrayList<VCFHeader>();
|
||||
headers.add(one);
|
||||
headers.add(two);
|
||||
Set<VCFHeaderLine> lines = VCFUtils.smartMergeHeaders(headers, null);
|
||||
Set<VCFHeaderLine> lines = VCFUtils.smartMergeHeaders(headers, false);
|
||||
Assert.assertEquals(lines.size(), VCFHeaderUnitTest.VCF4headerStringCount);
|
||||
}
|
||||
|
||||
|
|
@ -175,7 +175,7 @@ public class CombineVariantsUnitTest {
|
|||
ArrayList<VCFHeader> headers = new ArrayList<VCFHeader>();
|
||||
headers.add(one);
|
||||
headers.add(two);
|
||||
Set<VCFHeaderLine> lines = VCFUtils.smartMergeHeaders(headers, null);
|
||||
Set<VCFHeaderLine> lines = VCFUtils.smartMergeHeaders(headers, false);
|
||||
Assert.assertEquals(lines.size(), VCFHeaderUnitTest.VCF4headerStringCount);
|
||||
}
|
||||
|
||||
|
|
@ -186,7 +186,7 @@ public class CombineVariantsUnitTest {
|
|||
ArrayList<VCFHeader> headers = new ArrayList<VCFHeader>();
|
||||
headers.add(one);
|
||||
headers.add(two);
|
||||
Set<VCFHeaderLine> lines = VCFUtils.smartMergeHeaders(headers, null);
|
||||
Set<VCFHeaderLine> lines = VCFUtils.smartMergeHeaders(headers, false);
|
||||
Assert.assertEquals(lines.size(), VCFHeaderUnitTest.VCF4headerStringCount);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -46,20 +46,16 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||
|
||||
import com.sun.org.apache.xpath.internal.operations.Gt;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.walkers.variantutils.ConcordanceMetrics;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.variant.utils.BaseUtils;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.variant.variantcontext.Genotype;
|
||||
import org.broadinstitute.variant.variantcontext.GenotypeBuilder;
|
||||
import org.broadinstitute.variant.variantcontext.GenotypeType;
|
||||
import org.broadinstitute.variant.variantcontext.GenotypesContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextBuilder;
|
||||
import org.broadinstitute.variant.vcf.VCFCodec;
|
||||
|
|
@ -67,7 +63,6 @@ import org.broadinstitute.variant.vcf.VCFHeader;
|
|||
import org.testng.annotations.Test;
|
||||
import org.broad.tribble.readers.AsciiLineReader;
|
||||
import org.broad.tribble.readers.PositionalBufferedStream;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
|
||||
|
|
@ -75,7 +70,6 @@ import java.io.File;
|
|||
import java.io.FileNotFoundException;
|
||||
import java.io.StringBufferInputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Set;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ import org.apache.log4j.Logger;
|
|||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.classloader.JVMUtils;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.ApplicationDetails;
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
|
|||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
|||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.file.FSLockWithShared;
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ import org.broadinstitute.sting.gatk.samples.Sample;
|
|||
import org.broadinstitute.sting.gatk.samples.SampleDB;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.recalibration.BQSRMode;
|
||||
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.refseq.RefSeqCodec;
|
||||
import org.broadinstitute.sting.utils.codecs.refseq.RefSeqFeature;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.variant.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
|
|
|||
|
|
@ -26,7 +26,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.diagnostics;
|
||||
|
||||
|
||||
import ca.mcgill.mcb.pcingola.interval.Intron;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.ArgumentCollection;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
|
|
@ -36,17 +35,13 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.variant.variantcontext.Genotype;
|
||||
import org.broadinstitute.variant.variantcontext.GenotypesContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
|
||||
|
||||
import java.io.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* print intervals file with all the variant sites that have "most" ( >= 90% by default) of the samples with "good" (>= 10 by default)coverage ("most" and "good" can be set in the command line).
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
|
||||
|
|
|
|||
|
|
@ -32,9 +32,8 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RefWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RefWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RefWalker;
|
||||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
||||
import java.util.Collections;
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ import org.broadinstitute.sting.utils.Utils;
|
|||
import org.broadinstitute.sting.utils.clipping.ClippingOp;
|
||||
import org.broadinstitute.sting.utils.clipping.ClippingRepresentation;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
|
|
|
|||
|
|
@ -49,7 +49,6 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.VariantEvalUtils;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -198,7 +197,7 @@ public class VariantEval extends RodWalker<Integer, Integer> implements TreeRedu
|
|||
protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50;
|
||||
|
||||
@Argument(shortName="ploidy", fullName="samplePloidy", doc="Per-sample ploidy (number of chromosomes per sample)", required=false)
|
||||
protected int ploidy = GATKVariantContextUtils.DEFAULT_PLOIDY;
|
||||
protected int ploidy = VariantContextUtils.DEFAULT_PLOIDY;
|
||||
|
||||
@Argument(fullName="ancestralAlignments", shortName="aa", doc="Fasta file with ancestral alleles", required=false)
|
||||
private File ancestralAlignmentsFile = null;
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
|||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.AnalysisModuleScanner;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.EvaluationContext;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
|
||||
|
|
|
|||
|
|
@ -27,8 +27,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
|
|||
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
|
@ -51,7 +51,7 @@ public class TandemRepeat extends VariantStratifier {
|
|||
public List<Object> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
if ( eval == null || ! eval.isIndel() )
|
||||
return ALL;
|
||||
else if ( GATKVariantContextUtils.isTandemRepeat(eval, ref.getForwardBases()) ) {
|
||||
else if ( VariantContextUtils.isTandemRepeat(eval, ref.getForwardBases()) ) {
|
||||
print("REPEAT", eval, ref);
|
||||
return REPEAT;
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -27,9 +27,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.manage
|
|||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.EvaluationContext;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.*;
|
||||
|
|
|
|||
|
|
@ -217,7 +217,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
|||
if ( SET_KEY.toLowerCase().equals("null") )
|
||||
SET_KEY = null;
|
||||
|
||||
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger);
|
||||
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), true);
|
||||
if ( SET_KEY != null )
|
||||
headerLines.add(new VCFInfoHeaderLine(SET_KEY, 1, VCFHeaderLineType.String, "Source VCF for the merged record in CombineVariants"));
|
||||
if ( !ASSUME_IDENTICAL_SAMPLES )
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
|
|
|
|||
|
|
@ -153,7 +153,7 @@ public class SelectHeaders extends RodWalker<Integer, Integer> implements TreeRe
|
|||
List<String> rodNames = Arrays.asList(variantCollection.variants.getName());
|
||||
|
||||
Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames);
|
||||
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger);
|
||||
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), true);
|
||||
|
||||
headerLines.add(new VCFHeaderLine(VCFHeader.SOURCE_KEY, "SelectHeaders"));
|
||||
|
||||
|
|
|
|||
|
|
@ -400,7 +400,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
|
||||
}
|
||||
// Initialize VCF header
|
||||
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), logger);
|
||||
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), true);
|
||||
headerLines.add(new VCFHeaderLine("source", "SelectVariants"));
|
||||
|
||||
if (KEEP_ORIGINAL_CHR_COUNTS) {
|
||||
|
|
|
|||
|
|
@ -36,9 +36,9 @@ import org.broadinstitute.sting.commandline.Input;
|
|||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
||||
import org.broadinstitute.variant.bcf2.BCF2Codec;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.variant.vcf.VCFCodec;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.variantcontext.writer.Options;
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ import org.apache.commons.math.MathException;
|
|||
import org.apache.commons.math.distribution.NormalDistribution;
|
||||
import org.apache.commons.math.distribution.NormalDistributionImpl;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
|
|
|||
|
|
@ -1,34 +1,34 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
|
||||
import java.util.*;
|
||||
|
|
|
|||
|
|
@ -29,8 +29,8 @@ import net.sf.samtools.SAMFileHeader;
|
|||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.text.ListFileUtils;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ import net.sf.samtools.CigarOperator;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ import org.broadinstitute.variant.utils.BaseUtils;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ import net.sf.samtools.CigarElement;
|
|||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ import org.broadinstitute.sting.gatk.CommandLineGATK;
|
|||
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.classloader.JVMUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.NGSPlatform;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.variant.utils.BaseUtils;
|
||||
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ import org.broad.tribble.readers.PositionalBufferedStream;
|
|||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
|
||||
|
|
@ -147,32 +147,4 @@ public class GATKVCFUtils {
|
|||
return VCFUtils.withUpdatedContigs(header, engine.getArguments().referenceFile, engine.getMasterSequenceDictionary());
|
||||
}
|
||||
|
||||
/**
|
||||
* Read all of the VCF records from source into memory, returning the header and the VariantContexts
|
||||
*
|
||||
* @param source the file to read, must be in VCF4 format
|
||||
* @return
|
||||
* @throws java.io.IOException
|
||||
*/
|
||||
public static Pair<VCFHeader, List<VariantContext>> readVCF(final File source) throws IOException {
|
||||
// read in the features
|
||||
final List<VariantContext> vcs = new ArrayList<VariantContext>();
|
||||
final VCFCodec codec = new VCFCodec();
|
||||
PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(source));
|
||||
FeatureCodecHeader header = codec.readHeader(pbs);
|
||||
pbs.close();
|
||||
|
||||
pbs = new PositionalBufferedStream(new FileInputStream(source));
|
||||
pbs.skip(header.getHeaderEnd());
|
||||
|
||||
final VCFHeader vcfHeader = (VCFHeader)header.getHeaderValue();
|
||||
|
||||
while ( ! pbs.isDone() ) {
|
||||
final VariantContext vc = codec.decode(pbs);
|
||||
if ( vc != null )
|
||||
vcs.add(vc);
|
||||
}
|
||||
|
||||
return new Pair<VCFHeader, List<VariantContext>>(vcfHeader, vcs);
|
||||
}
|
||||
}
|
||||
|
|
@ -25,22 +25,12 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.variant;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class GATKVariantContextUtils {
|
||||
|
||||
public static final int DEFAULT_PLOIDY = 2;
|
||||
public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
|
||||
private static final List<Allele> NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
||||
|
||||
/**
|
||||
* create a genome location, given a variant context
|
||||
* @param genomeLocParser parser
|
||||
|
|
@ -51,425 +41,4 @@ public class GATKVariantContextUtils {
|
|||
return genomeLocParser.createGenomeLoc(vc.getChr(), vc.getStart(), vc.getEnd(), true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true iff VC is an non-complex indel where every allele represents an expansion or
|
||||
* contraction of a series of identical bases in the reference.
|
||||
*
|
||||
* For example, suppose the ref bases are CTCTCTGA, which includes a 3x repeat of CTCTCT
|
||||
*
|
||||
* If VC = -/CT, then this function returns true because the CT insertion matches exactly the
|
||||
* upcoming reference.
|
||||
* If VC = -/CTA then this function returns false because the CTA isn't a perfect match
|
||||
*
|
||||
* Now consider deletions:
|
||||
*
|
||||
* If VC = CT/- then again the same logic applies and this returns true
|
||||
* The case of CTA/- makes no sense because it doesn't actually match the reference bases.
|
||||
*
|
||||
* The logic of this function is pretty simple. Take all of the non-null alleles in VC. For
|
||||
* each insertion allele of n bases, check if that allele matches the next n reference bases.
|
||||
* For each deletion allele of n bases, check if this matches the reference bases at n - 2 n,
|
||||
* as it must necessarily match the first n bases. If this test returns true for all
|
||||
* alleles you are a tandem repeat, otherwise you are not.
|
||||
*
|
||||
* @param vc
|
||||
* @param refBasesStartingAtVCWithPad not this is assumed to include the PADDED reference
|
||||
* @return
|
||||
*/
|
||||
@Requires({"vc != null", "refBasesStartingAtVCWithPad != null && refBasesStartingAtVCWithPad.length > 0"})
|
||||
public static boolean isTandemRepeat(final VariantContext vc, final byte[] refBasesStartingAtVCWithPad) {
|
||||
final String refBasesStartingAtVCWithoutPad = new String(refBasesStartingAtVCWithPad).substring(1);
|
||||
if ( ! vc.isIndel() ) // only indels are tandem repeats
|
||||
return false;
|
||||
|
||||
final Allele ref = vc.getReference();
|
||||
|
||||
for ( final Allele allele : vc.getAlternateAlleles() ) {
|
||||
if ( ! isRepeatAllele(ref, allele, refBasesStartingAtVCWithoutPad) )
|
||||
return false;
|
||||
}
|
||||
|
||||
// we've passed all of the tests, so we are a repeat
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param vc
|
||||
* @param refBasesStartingAtVCWithPad
|
||||
* @return
|
||||
*/
|
||||
@Requires({"vc != null", "refBasesStartingAtVCWithPad != null && refBasesStartingAtVCWithPad.length > 0"})
|
||||
public static Pair<List<Integer>,byte[]> getNumTandemRepeatUnits(final VariantContext vc, final byte[] refBasesStartingAtVCWithPad) {
|
||||
final boolean VERBOSE = false;
|
||||
final String refBasesStartingAtVCWithoutPad = new String(refBasesStartingAtVCWithPad).substring(1);
|
||||
if ( ! vc.isIndel() ) // only indels are tandem repeats
|
||||
return null;
|
||||
|
||||
final Allele refAllele = vc.getReference();
|
||||
final byte[] refAlleleBases = Arrays.copyOfRange(refAllele.getBases(), 1, refAllele.length());
|
||||
|
||||
byte[] repeatUnit = null;
|
||||
final ArrayList<Integer> lengths = new ArrayList<Integer>();
|
||||
|
||||
for ( final Allele allele : vc.getAlternateAlleles() ) {
|
||||
Pair<int[],byte[]> result = getNumTandemRepeatUnits(refAlleleBases, Arrays.copyOfRange(allele.getBases(), 1, allele.length()), refBasesStartingAtVCWithoutPad.getBytes());
|
||||
|
||||
final int[] repetitionCount = result.first;
|
||||
// repetition count = 0 means allele is not a tandem expansion of context
|
||||
if (repetitionCount[0] == 0 || repetitionCount[1] == 0)
|
||||
return null;
|
||||
|
||||
if (lengths.size() == 0) {
|
||||
lengths.add(repetitionCount[0]); // add ref allele length only once
|
||||
}
|
||||
lengths.add(repetitionCount[1]); // add this alt allele's length
|
||||
|
||||
repeatUnit = result.second;
|
||||
if (VERBOSE) {
|
||||
System.out.println("RefContext:"+refBasesStartingAtVCWithoutPad);
|
||||
System.out.println("Ref:"+refAllele.toString()+" Count:" + String.valueOf(repetitionCount[0]));
|
||||
System.out.println("Allele:"+allele.toString()+" Count:" + String.valueOf(repetitionCount[1]));
|
||||
System.out.println("RU:"+new String(repeatUnit));
|
||||
}
|
||||
}
|
||||
|
||||
return new Pair<List<Integer>, byte[]>(lengths,repeatUnit);
|
||||
}
|
||||
|
||||
protected static Pair<int[],byte[]> getNumTandemRepeatUnits(final byte[] refBases, final byte[] altBases, final byte[] remainingRefContext) {
|
||||
/* we can't exactly apply same logic as in basesAreRepeated() to compute tandem unit and number of repeated units.
|
||||
Consider case where ref =ATATAT and we have an insertion of ATAT. Natural description is (AT)3 -> (AT)5.
|
||||
*/
|
||||
|
||||
byte[] longB;
|
||||
// find first repeat unit based on either ref or alt, whichever is longer
|
||||
if (altBases.length > refBases.length)
|
||||
longB = altBases;
|
||||
else
|
||||
longB = refBases;
|
||||
|
||||
// see if non-null allele (either ref or alt, whichever is longer) can be decomposed into several identical tandem units
|
||||
// for example, -*,CACA needs to first be decomposed into (CA)2
|
||||
final int repeatUnitLength = findRepeatedSubstring(longB);
|
||||
final byte[] repeatUnit = Arrays.copyOf(longB, repeatUnitLength);
|
||||
|
||||
final int[] repetitionCount = new int[2];
|
||||
// repetitionCount[0] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(refBases, remainingRefContext));
|
||||
// repetitionCount[1] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(altBases, remainingRefContext));
|
||||
int repetitionsInRef = findNumberofRepetitions(repeatUnit,refBases);
|
||||
repetitionCount[0] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(refBases, remainingRefContext))-repetitionsInRef;
|
||||
repetitionCount[1] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(altBases, remainingRefContext))-repetitionsInRef;
|
||||
|
||||
return new Pair<int[], byte[]>(repetitionCount, repeatUnit);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Find out if a string can be represented as a tandem number of substrings.
|
||||
* For example ACTACT is a 2-tandem of ACT,
|
||||
* but ACTACA is not.
|
||||
*
|
||||
* @param bases String to be tested
|
||||
* @return Length of repeat unit, if string can be represented as tandem of substring (if it can't
|
||||
* be represented as one, it will be just the length of the input string)
|
||||
*/
|
||||
public static int findRepeatedSubstring(byte[] bases) {
|
||||
|
||||
int repLength;
|
||||
for (repLength=1; repLength <=bases.length; repLength++) {
|
||||
final byte[] candidateRepeatUnit = Arrays.copyOf(bases,repLength);
|
||||
boolean allBasesMatch = true;
|
||||
for (int start = repLength; start < bases.length; start += repLength ) {
|
||||
// check that remaining of string is exactly equal to repeat unit
|
||||
final byte[] basePiece = Arrays.copyOfRange(bases,start,start+candidateRepeatUnit.length);
|
||||
if (!Arrays.equals(candidateRepeatUnit, basePiece)) {
|
||||
allBasesMatch = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (allBasesMatch)
|
||||
return repLength;
|
||||
}
|
||||
|
||||
return repLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper routine that finds number of repetitions a string consists of.
|
||||
* For example, for string ATAT and repeat unit AT, number of repetitions = 2
|
||||
* @param repeatUnit Substring
|
||||
* @param testString String to test
|
||||
* @return Number of repetitions (0 if testString is not a concatenation of n repeatUnit's
|
||||
*/
|
||||
public static int findNumberofRepetitions(byte[] repeatUnit, byte[] testString) {
|
||||
int numRepeats = 0;
|
||||
for (int start = 0; start < testString.length; start += repeatUnit.length) {
|
||||
int end = start + repeatUnit.length;
|
||||
byte[] unit = Arrays.copyOfRange(testString,start, end);
|
||||
if(Arrays.equals(unit,repeatUnit))
|
||||
numRepeats++;
|
||||
else
|
||||
return numRepeats;
|
||||
}
|
||||
return numRepeats;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function for isTandemRepeat that checks that allele matches somewhere on the reference
|
||||
* @param ref
|
||||
* @param alt
|
||||
* @param refBasesStartingAtVCWithoutPad
|
||||
* @return
|
||||
*/
|
||||
protected static boolean isRepeatAllele(final Allele ref, final Allele alt, final String refBasesStartingAtVCWithoutPad) {
|
||||
if ( ! Allele.oneIsPrefixOfOther(ref, alt) )
|
||||
return false; // we require one allele be a prefix of another
|
||||
|
||||
if ( ref.length() > alt.length() ) { // we are a deletion
|
||||
return basesAreRepeated(ref.getBaseString(), alt.getBaseString(), refBasesStartingAtVCWithoutPad, 2);
|
||||
} else { // we are an insertion
|
||||
return basesAreRepeated(alt.getBaseString(), ref.getBaseString(), refBasesStartingAtVCWithoutPad, 1);
|
||||
}
|
||||
}
|
||||
|
||||
protected static boolean basesAreRepeated(final String l, final String s, final String ref, final int minNumberOfMatches) {
|
||||
final String potentialRepeat = l.substring(s.length()); // skip s bases
|
||||
|
||||
for ( int i = 0; i < minNumberOfMatches; i++) {
|
||||
final int start = i * potentialRepeat.length();
|
||||
final int end = (i+1) * potentialRepeat.length();
|
||||
if ( ref.length() < end )
|
||||
return false; // we ran out of bases to test
|
||||
final String refSub = ref.substring(start, end);
|
||||
if ( ! refSub.equals(potentialRepeat) )
|
||||
return false; // repeat didn't match, fail
|
||||
}
|
||||
|
||||
return true; // we passed all tests, we matched
|
||||
}
|
||||
|
||||
/**
|
||||
* Assign genotypes (GTs) to the samples in the Variant Context greedily based on the PLs
|
||||
*
|
||||
* @param vc variant context with genotype likelihoods
|
||||
* @return genotypes context
|
||||
*/
|
||||
public static GenotypesContext assignDiploidGenotypes(final VariantContext vc) {
|
||||
return subsetDiploidAlleles(vc, vc.getAlleles(), true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Split variant context into its biallelic components if there are more than 2 alleles
|
||||
*
|
||||
* For VC has A/B/C alleles, returns A/B and A/C contexts.
|
||||
* Genotypes are all no-calls now (it's not possible to fix them easily)
|
||||
* Alleles are right trimmed to satisfy VCF conventions
|
||||
*
|
||||
* If vc is biallelic or non-variant it is just returned
|
||||
*
|
||||
* Chromosome counts are updated (but they are by definition 0)
|
||||
*
|
||||
* @param vc a potentially multi-allelic variant context
|
||||
* @return a list of bi-allelic (or monomorphic) variant context
|
||||
*/
|
||||
public static List<VariantContext> splitVariantContextToBiallelics(final VariantContext vc) {
|
||||
if ( ! vc.isVariant() || vc.isBiallelic() )
|
||||
// non variant or biallelics already satisfy the contract
|
||||
return Collections.singletonList(vc);
|
||||
else {
|
||||
final List<VariantContext> biallelics = new LinkedList<VariantContext>();
|
||||
|
||||
for ( final Allele alt : vc.getAlternateAlleles() ) {
|
||||
VariantContextBuilder builder = new VariantContextBuilder(vc);
|
||||
final List<Allele> alleles = Arrays.asList(vc.getReference(), alt);
|
||||
builder.alleles(alleles);
|
||||
builder.genotypes(subsetDiploidAlleles(vc, alleles, false));
|
||||
VariantContextUtils.calculateChromosomeCounts(builder, true);
|
||||
biallelics.add(reverseTrimAlleles(builder.make()));
|
||||
}
|
||||
|
||||
return biallelics;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* subset the Variant Context to the specific set of alleles passed in (pruning the PLs appropriately)
|
||||
*
|
||||
* @param vc variant context with genotype likelihoods
|
||||
* @param allelesToUse which alleles from the vc are okay to use; *** must be in the same relative order as those in the original VC ***
|
||||
* @param assignGenotypes true if we should update the genotypes based on the (subsetted) PLs
|
||||
* @return genotypes
|
||||
*/
|
||||
public static GenotypesContext subsetDiploidAlleles(final VariantContext vc,
|
||||
final List<Allele> allelesToUse,
|
||||
final boolean assignGenotypes) {
|
||||
|
||||
// the genotypes with PLs
|
||||
final GenotypesContext oldGTs = vc.getGenotypes();
|
||||
|
||||
// samples
|
||||
final List<String> sampleIndices = oldGTs.getSampleNamesOrderedByName();
|
||||
|
||||
// the new genotypes to create
|
||||
final GenotypesContext newGTs = GenotypesContext.create();
|
||||
|
||||
// we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward
|
||||
final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
|
||||
final int numNewAltAlleles = allelesToUse.size() - 1;
|
||||
|
||||
// which PLs should be carried forward?
|
||||
ArrayList<Integer> likelihoodIndexesToUse = null;
|
||||
|
||||
// an optimization: if we are supposed to use all (or none in the case of a ref call) of the alleles,
|
||||
// then we can keep the PLs as is; otherwise, we determine which ones to keep
|
||||
if ( numNewAltAlleles != numOriginalAltAlleles && numNewAltAlleles > 0 ) {
|
||||
likelihoodIndexesToUse = new ArrayList<Integer>(30);
|
||||
|
||||
final boolean[] altAlleleIndexToUse = new boolean[numOriginalAltAlleles];
|
||||
for ( int i = 0; i < numOriginalAltAlleles; i++ ) {
|
||||
if ( allelesToUse.contains(vc.getAlternateAllele(i)) )
|
||||
altAlleleIndexToUse[i] = true;
|
||||
}
|
||||
|
||||
// numLikelihoods takes total # of alleles. Use default # of chromosomes (ploidy) = 2
|
||||
final int numLikelihoods = GenotypeLikelihoods.numLikelihoods(1 + numOriginalAltAlleles, DEFAULT_PLOIDY);
|
||||
for ( int PLindex = 0; PLindex < numLikelihoods; PLindex++ ) {
|
||||
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
|
||||
// consider this entry only if both of the alleles are good
|
||||
if ( (alleles.alleleIndex1 == 0 || altAlleleIndexToUse[alleles.alleleIndex1 - 1]) && (alleles.alleleIndex2 == 0 || altAlleleIndexToUse[alleles.alleleIndex2 - 1]) )
|
||||
likelihoodIndexesToUse.add(PLindex);
|
||||
}
|
||||
}
|
||||
|
||||
// create the new genotypes
|
||||
for ( int k = 0; k < oldGTs.size(); k++ ) {
|
||||
final Genotype g = oldGTs.get(sampleIndices.get(k));
|
||||
if ( !g.hasLikelihoods() ) {
|
||||
newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES));
|
||||
continue;
|
||||
}
|
||||
|
||||
// create the new likelihoods array from the alleles we are allowed to use
|
||||
final double[] originalLikelihoods = g.getLikelihoods().getAsVector();
|
||||
double[] newLikelihoods;
|
||||
if ( likelihoodIndexesToUse == null ) {
|
||||
newLikelihoods = originalLikelihoods;
|
||||
} else {
|
||||
newLikelihoods = new double[likelihoodIndexesToUse.size()];
|
||||
int newIndex = 0;
|
||||
for ( int oldIndex : likelihoodIndexesToUse )
|
||||
newLikelihoods[newIndex++] = originalLikelihoods[oldIndex];
|
||||
|
||||
// might need to re-normalize
|
||||
newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true);
|
||||
}
|
||||
|
||||
// if there is no mass on the (new) likelihoods, then just no-call the sample
|
||||
if ( MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) {
|
||||
newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES));
|
||||
}
|
||||
else {
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(g);
|
||||
|
||||
if ( numNewAltAlleles == 0 )
|
||||
gb.noPL();
|
||||
else
|
||||
gb.PL(newLikelihoods);
|
||||
|
||||
// if we weren't asked to assign a genotype, then just no-call the sample
|
||||
if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) {
|
||||
gb.alleles(NO_CALL_ALLELES);
|
||||
}
|
||||
else {
|
||||
// find the genotype with maximum likelihoods
|
||||
int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods);
|
||||
GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
|
||||
|
||||
gb.alleles(Arrays.asList(allelesToUse.get(alleles.alleleIndex1), allelesToUse.get(alleles.alleleIndex2)));
|
||||
if ( numNewAltAlleles != 0 ) gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods));
|
||||
}
|
||||
newGTs.add(gb.make());
|
||||
}
|
||||
}
|
||||
|
||||
return newGTs;
|
||||
}
|
||||
|
||||
public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) {
|
||||
|
||||
// see whether we need to trim common reference base from all alleles
|
||||
final int trimExtent = computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, false);
|
||||
if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 )
|
||||
return inputVC;
|
||||
|
||||
final List<Allele> alleles = new ArrayList<Allele>();
|
||||
final GenotypesContext genotypes = GenotypesContext.create();
|
||||
final Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
|
||||
|
||||
for (final Allele a : inputVC.getAlleles()) {
|
||||
if (a.isSymbolic()) {
|
||||
alleles.add(a);
|
||||
originalToTrimmedAlleleMap.put(a, a);
|
||||
} else {
|
||||
// get bases for current allele and create a new one with trimmed bases
|
||||
final byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent);
|
||||
final Allele trimmedAllele = Allele.create(newBases, a.isReference());
|
||||
alleles.add(trimmedAllele);
|
||||
originalToTrimmedAlleleMap.put(a, trimmedAllele);
|
||||
}
|
||||
}
|
||||
|
||||
// now we can recreate new genotypes with trimmed alleles
|
||||
for ( final Genotype genotype : inputVC.getGenotypes() ) {
|
||||
final List<Allele> originalAlleles = genotype.getAlleles();
|
||||
final List<Allele> trimmedAlleles = new ArrayList<Allele>();
|
||||
for ( final Allele a : originalAlleles ) {
|
||||
if ( a.isCalled() )
|
||||
trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
|
||||
else
|
||||
trimmedAlleles.add(Allele.NO_CALL);
|
||||
}
|
||||
genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make());
|
||||
}
|
||||
|
||||
return new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length() - 1).alleles(alleles).genotypes(genotypes).make();
|
||||
}
|
||||
|
||||
public static int computeReverseClipping(final List<Allele> unclippedAlleles,
|
||||
final byte[] ref,
|
||||
final int forwardClipping,
|
||||
final boolean allowFullClip) {
|
||||
int clipping = 0;
|
||||
boolean stillClipping = true;
|
||||
|
||||
while ( stillClipping ) {
|
||||
for ( final Allele a : unclippedAlleles ) {
|
||||
if ( a.isSymbolic() )
|
||||
continue;
|
||||
|
||||
// we need to ensure that we don't reverse clip out all of the bases from an allele because we then will have the wrong
|
||||
// position set for the VariantContext (although it's okay to forward clip it all out, because the position will be fine).
|
||||
if ( a.length() - clipping == 0 )
|
||||
return clipping - (allowFullClip ? 0 : 1);
|
||||
|
||||
if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 ) {
|
||||
stillClipping = false;
|
||||
}
|
||||
else if ( ref.length == clipping ) {
|
||||
if ( allowFullClip )
|
||||
stillClipping = false;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
else if ( a.getBases()[a.length()-clipping-1] != ref[ref.length-clipping-1] ) {
|
||||
stillClipping = false;
|
||||
}
|
||||
}
|
||||
if ( stillClipping )
|
||||
clipping++;
|
||||
}
|
||||
|
||||
return clipping;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,13 +27,13 @@ package org.broadinstitute.variant.bcf2;
|
|||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broad.tribble.FeatureCodec;
|
||||
import org.broad.tribble.FeatureCodecHeader;
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broad.tribble.readers.AsciiLineReader;
|
||||
import org.broad.tribble.readers.PositionalBufferedStream;
|
||||
import org.broadinstitute.variant.utils.GeneralUtils;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
|
||||
|
|
@ -50,8 +50,6 @@ import java.util.Map;
|
|||
* Decode BCF2 files
|
||||
*/
|
||||
public final class BCF2Codec implements FeatureCodec<VariantContext> {
|
||||
final protected static Logger logger = Logger.getLogger(BCF2Codec.class);
|
||||
|
||||
private final static int ALLOWED_MAJOR_VERSION = 2;
|
||||
private final static int MIN_MINOR_VERSION = 1;
|
||||
|
||||
|
|
@ -149,7 +147,9 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
|
|||
if ( bcfVersion.getMinorVersion() < MIN_MINOR_VERSION )
|
||||
error("BCF2Codec can only process BCF2 files with minor version >= " + MIN_MINOR_VERSION + " but this file has minor version " + bcfVersion.getMinorVersion());
|
||||
|
||||
logger.debug("Parsing data stream with BCF version " + bcfVersion);
|
||||
if ( GeneralUtils.DEBUG_MODE_ENABLED ) {
|
||||
System.err.println("Parsing data stream with BCF version " + bcfVersion);
|
||||
}
|
||||
|
||||
final int headerSizeInBytes = BCF2Type.INT32.read(inputStream);
|
||||
|
||||
|
|
|
|||
|
|
@ -27,9 +27,8 @@ package org.broadinstitute.variant.bcf2;
|
|||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.FeatureCodec;
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broadinstitute.variant.utils.GeneralUtils;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
|
|
@ -38,8 +37,6 @@ import java.util.ArrayList;
|
|||
import java.util.Arrays;
|
||||
|
||||
public final class BCF2Decoder {
|
||||
final protected static Logger logger = Logger.getLogger(FeatureCodec.class);
|
||||
|
||||
byte[] recordBytes = null;
|
||||
ByteArrayInputStream recordStream = null;
|
||||
|
||||
|
|
@ -343,8 +340,9 @@ public final class BCF2Decoder {
|
|||
bytesRead += read1;
|
||||
}
|
||||
|
||||
if ( nReadAttempts > 1 ) // TODO -- remove me
|
||||
logger.warn("Required multiple read attempts to actually get the entire BCF2 block, unexpected behavior");
|
||||
if ( GeneralUtils.DEBUG_MODE_ENABLED && nReadAttempts > 1 ) { // TODO -- remove me
|
||||
System.err.println("Required multiple read attempts to actually get the entire BCF2 block, unexpected behavior");
|
||||
}
|
||||
|
||||
validateReadBytes(bytesRead, nReadAttempts, blockSizeInBytes);
|
||||
} catch ( IOException e ) {
|
||||
|
|
|
|||
|
|
@ -27,7 +27,6 @@ package org.broadinstitute.variant.bcf2;
|
|||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.variant.vcf.VCFConstants;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
|
|
@ -46,7 +45,6 @@ import java.util.*;
|
|||
* @since 6/12
|
||||
*/
|
||||
public class BCF2GenotypeFieldDecoders {
|
||||
final protected static Logger logger = Logger.getLogger(BCF2GenotypeFieldDecoders.class);
|
||||
private final static boolean ENABLE_FASTPATH_GT = true;
|
||||
private final static int MIN_SAMPLES_FOR_FASTPATH_GENOTYPES = 0; // TODO -- update to reasonable number
|
||||
|
||||
|
|
|
|||
|
|
@ -26,7 +26,6 @@
|
|||
package org.broadinstitute.variant.bcf2;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
|
||||
|
|
@ -40,8 +39,6 @@ import java.util.*;
|
|||
* @since 5/12
|
||||
*/
|
||||
public class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser {
|
||||
final protected static Logger logger = Logger.getLogger(BCF2LazyGenotypesDecoder.class);
|
||||
|
||||
// the essential information for us to use to decode the genotypes data
|
||||
// initialized when this lazy decoder is created, as we know all of this from the BCF2Codec
|
||||
// and its stored here again for code cleanliness
|
||||
|
|
@ -63,8 +60,6 @@ public class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser
|
|||
|
||||
@Override
|
||||
public LazyGenotypesContext.LazyData parse(final Object data) {
|
||||
// if ( logger.isDebugEnabled() )
|
||||
// logger.debug("Decoding BCF genotypes for " + nSamples + " samples with " + nFields + " fields each");
|
||||
try {
|
||||
|
||||
// load our byte[] data into the decoder
|
||||
|
|
|
|||
|
|
@ -25,10 +25,18 @@
|
|||
|
||||
package org.broadinstitute.variant.utils;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
import java.util.*;
|
||||
|
||||
public class Utils {
|
||||
/**
|
||||
* Constants and utility methods used throughout the VCF/BCF/VariantContext classes
|
||||
*/
|
||||
public class GeneralUtils {
|
||||
|
||||
/**
|
||||
* Setting this to true causes the VCF/BCF/VariantContext classes to emit debugging information
|
||||
* to standard error
|
||||
*/
|
||||
public static final boolean DEBUG_MODE_ENABLED = false;
|
||||
|
||||
/**
|
||||
* The smallest log10 value we'll emit from normalizeFromLog10 and other functions
|
||||
|
|
@ -66,7 +74,6 @@ public class Utils {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* normalizes the log10-based array. ASSUMES THAT ALL ARRAY ENTRIES ARE <= 0 (<= 1 IN REAL-SPACE).
|
||||
*
|
||||
|
|
@ -134,10 +141,21 @@ public class Utils {
|
|||
return normalized;
|
||||
}
|
||||
|
||||
public static double sum(double[] values) {
|
||||
double s = 0.0;
|
||||
for (double v : values)
|
||||
s += v;
|
||||
return s;
|
||||
}
|
||||
|
||||
public static double arrayMax(final double[] array) {
|
||||
return array[maxElementIndex(array, array.length)];
|
||||
}
|
||||
|
||||
public static int maxElementIndex(final double[] array) {
|
||||
return maxElementIndex(array, array.length);
|
||||
}
|
||||
|
||||
public static int maxElementIndex(final double[] array, final int endIndex) {
|
||||
if (array == null || array.length == 0)
|
||||
throw new IllegalArgumentException("Array cannot be null!");
|
||||
|
|
@ -150,6 +168,82 @@ public class Utils {
|
|||
|
||||
return maxI;
|
||||
}
|
||||
|
||||
public static <T> List<T> cons(final T elt, final List<T> l) {
|
||||
List<T> l2 = new ArrayList<T>();
|
||||
l2.add(elt);
|
||||
if (l != null) l2.addAll(l);
|
||||
return l2;
|
||||
}
|
||||
|
||||
/**
|
||||
* Make all combinations of N size of objects
|
||||
*
|
||||
* if objects = [A, B, C]
|
||||
* if N = 1 => [[A], [B], [C]]
|
||||
* if N = 2 => [[A, A], [B, A], [C, A], [A, B], [B, B], [C, B], [A, C], [B, C], [C, C]]
|
||||
*
|
||||
* @param objects
|
||||
* @param n
|
||||
* @param <T>
|
||||
* @param withReplacement if false, the resulting permutations will only contain unique objects from objects
|
||||
* @return
|
||||
*/
|
||||
public static <T> List<List<T>> makePermutations(final List<T> objects, final int n, final boolean withReplacement) {
|
||||
final List<List<T>> combinations = new ArrayList<List<T>>();
|
||||
|
||||
if ( n <= 0 )
|
||||
;
|
||||
else if ( n == 1 ) {
|
||||
for ( final T o : objects )
|
||||
combinations.add(Collections.singletonList(o));
|
||||
} else {
|
||||
final List<List<T>> sub = makePermutations(objects, n - 1, withReplacement);
|
||||
for ( List<T> subI : sub ) {
|
||||
for ( final T a : objects ) {
|
||||
if ( withReplacement || ! subI.contains(a) )
|
||||
combinations.add(cons(a, subI));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return combinations;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares double values for equality (within 1e-6), or inequality.
|
||||
*
|
||||
* @param a the first double value
|
||||
* @param b the second double value
|
||||
* @return -1 if a is greater than b, 0 if a is equal to be within 1e-6, 1 if b is greater than a.
|
||||
*/
|
||||
public static byte compareDoubles(double a, double b) {
|
||||
return compareDoubles(a, b, 1e-6);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares double values for equality (within epsilon), or inequality.
|
||||
*
|
||||
* @param a the first double value
|
||||
* @param b the second double value
|
||||
* @param epsilon the precision within which two double values will be considered equal
|
||||
* @return -1 if a is greater than b, 0 if a is equal to be within epsilon, 1 if b is greater than a.
|
||||
*/
|
||||
public static byte compareDoubles(double a, double b, double epsilon) {
|
||||
if (Math.abs(a - b) < epsilon) {
|
||||
return 0;
|
||||
}
|
||||
if (a > b) {
|
||||
return -1;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static public final <T> List<T> reverse(final List<T> l) {
|
||||
final List<T> newL = new ArrayList<T>(l);
|
||||
Collections.reverse(newL);
|
||||
return newL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -23,7 +23,7 @@
|
|||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.collections;
|
||||
package org.broadinstitute.variant.utils;
|
||||
|
||||
|
||||
public class Pair<X,Y> {
|
||||
|
|
@ -28,7 +28,7 @@ package org.broadinstitute.variant.variantcontext;
|
|||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broadinstitute.variant.utils.Utils;
|
||||
import org.broadinstitute.variant.utils.GeneralUtils;
|
||||
import org.broadinstitute.variant.vcf.VCFConstants;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
|
@ -155,7 +155,7 @@ public class GenotypeLikelihoods {
|
|||
//Returns null in case of missing likelihoods
|
||||
public EnumMap<GenotypeType,Double> getAsMap(boolean normalizeFromLog10){
|
||||
//Make sure that the log10likelihoods are set
|
||||
double[] likelihoods = normalizeFromLog10 ? Utils.normalizeFromLog10(getAsVector()) : getAsVector();
|
||||
double[] likelihoods = normalizeFromLog10 ? GeneralUtils.normalizeFromLog10(getAsVector()) : getAsVector();
|
||||
if(likelihoods == null)
|
||||
return null;
|
||||
EnumMap<GenotypeType,Double> likelihoodsMap = new EnumMap<GenotypeType, Double>(GenotypeType.class);
|
||||
|
|
@ -215,7 +215,7 @@ public class GenotypeLikelihoods {
|
|||
if (qual < 0) {
|
||||
// QUAL can be negative if the chosen genotype is not the most likely one individually.
|
||||
// In this case, we compute the actual genotype probability and QUAL is the likelihood of it not being the chosen one
|
||||
double[] normalized = Utils.normalizeFromLog10(likelihoods);
|
||||
double[] normalized = GeneralUtils.normalizeFromLog10(likelihoods);
|
||||
double chosenGenotype = normalized[iOfChoosenGenotype];
|
||||
return Math.log10(1.0 - chosenGenotype);
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -25,10 +25,10 @@
|
|||
|
||||
package org.broadinstitute.variant.variantcontext;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broad.tribble.util.ParsingUtils;
|
||||
import org.broadinstitute.variant.utils.GeneralUtils;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -202,7 +202,6 @@ import java.util.*;
|
|||
public class VariantContext implements Feature { // to enable tribble integration
|
||||
private final static boolean WARN_ABOUT_BAD_END = true;
|
||||
private final static int MAX_ALLELE_SIZE_FOR_NON_SV = 150;
|
||||
final protected static Logger logger = Logger.getLogger(VariantContext.class);
|
||||
private boolean fullyDecoded = false;
|
||||
protected CommonInfo commonInfo = null;
|
||||
public final static double NO_LOG10_PERROR = CommonInfo.NO_LOG10_PERROR;
|
||||
|
|
@ -1176,10 +1175,12 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
final String message = "Badly formed variant context at location " + getChr() + ":"
|
||||
+ getStart() + "; getEnd() was " + getEnd()
|
||||
+ " but this VariantContext contains an END key with value " + end;
|
||||
if ( WARN_ABOUT_BAD_END )
|
||||
logger.warn(message);
|
||||
else
|
||||
if ( GeneralUtils.DEBUG_MODE_ENABLED && WARN_ABOUT_BAD_END ) {
|
||||
System.err.println(message);
|
||||
}
|
||||
else {
|
||||
throw new TribbleException(message);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
final long length = (stop - start) + 1;
|
||||
|
|
|
|||
|
|
@ -29,24 +29,27 @@ import com.google.java.contract.Ensures;
|
|||
import com.google.java.contract.Requires;
|
||||
import org.apache.commons.jexl2.Expression;
|
||||
import org.apache.commons.jexl2.JexlEngine;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broad.tribble.util.popgen.HardyWeinbergCalculation;
|
||||
import org.broadinstitute.variant.utils.BaseUtils;
|
||||
import org.broadinstitute.variant.utils.Utils;
|
||||
import org.broadinstitute.variant.utils.GeneralUtils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.*;
|
||||
|
||||
public class VariantContextUtils {
|
||||
private static Logger logger = Logger.getLogger(VariantContextUtils.class);
|
||||
public final static String MERGE_INTERSECTION = "Intersection";
|
||||
public final static String MERGE_FILTER_IN_ALL = "FilteredInAll";
|
||||
public final static String MERGE_REF_IN_ALL = "ReferenceInAll";
|
||||
public final static String MERGE_FILTER_PREFIX = "filterIn";
|
||||
public static final int DEFAULT_PLOIDY = 2;
|
||||
public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
|
||||
|
||||
private static Set<String> MISSING_KEYS_WARNED_ABOUT = new HashSet<String>();
|
||||
private static final List<Allele> NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
||||
|
||||
final public static JexlEngine engine = new JexlEngine();
|
||||
private final static boolean ASSUME_MISSING_FIELDS_ARE_STRINGS = false;
|
||||
|
|
@ -166,7 +169,8 @@ public class VariantContextUtils {
|
|||
if ( ASSUME_MISSING_FIELDS_ARE_STRINGS ) {
|
||||
if ( ! MISSING_KEYS_WARNED_ABOUT.contains(field) ) {
|
||||
MISSING_KEYS_WARNED_ABOUT.add(field);
|
||||
logger.warn("Field " + field + " missing from VCF header, assuming it is an unbounded string type");
|
||||
if ( GeneralUtils.DEBUG_MODE_ENABLED )
|
||||
System.err.println("Field " + field + " missing from VCF header, assuming it is an unbounded string type");
|
||||
}
|
||||
return new VCFInfoHeaderLine(field, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Auto-generated string header for " + field);
|
||||
}
|
||||
|
|
@ -176,6 +180,428 @@ public class VariantContextUtils {
|
|||
return metaData;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true iff VC is an non-complex indel where every allele represents an expansion or
|
||||
* contraction of a series of identical bases in the reference.
|
||||
*
|
||||
* For example, suppose the ref bases are CTCTCTGA, which includes a 3x repeat of CTCTCT
|
||||
*
|
||||
* If VC = -/CT, then this function returns true because the CT insertion matches exactly the
|
||||
* upcoming reference.
|
||||
* If VC = -/CTA then this function returns false because the CTA isn't a perfect match
|
||||
*
|
||||
* Now consider deletions:
|
||||
*
|
||||
* If VC = CT/- then again the same logic applies and this returns true
|
||||
* The case of CTA/- makes no sense because it doesn't actually match the reference bases.
|
||||
*
|
||||
* The logic of this function is pretty simple. Take all of the non-null alleles in VC. For
|
||||
* each insertion allele of n bases, check if that allele matches the next n reference bases.
|
||||
* For each deletion allele of n bases, check if this matches the reference bases at n - 2 n,
|
||||
* as it must necessarily match the first n bases. If this test returns true for all
|
||||
* alleles you are a tandem repeat, otherwise you are not.
|
||||
*
|
||||
* @param vc
|
||||
* @param refBasesStartingAtVCWithPad not this is assumed to include the PADDED reference
|
||||
* @return
|
||||
*/
|
||||
@Requires({"vc != null", "refBasesStartingAtVCWithPad != null && refBasesStartingAtVCWithPad.length > 0"})
|
||||
public static boolean isTandemRepeat(final VariantContext vc, final byte[] refBasesStartingAtVCWithPad) {
|
||||
final String refBasesStartingAtVCWithoutPad = new String(refBasesStartingAtVCWithPad).substring(1);
|
||||
if ( ! vc.isIndel() ) // only indels are tandem repeats
|
||||
return false;
|
||||
|
||||
final Allele ref = vc.getReference();
|
||||
|
||||
for ( final Allele allele : vc.getAlternateAlleles() ) {
|
||||
if ( ! isRepeatAllele(ref, allele, refBasesStartingAtVCWithoutPad) )
|
||||
return false;
|
||||
}
|
||||
|
||||
// we've passed all of the tests, so we are a repeat
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param vc
|
||||
* @param refBasesStartingAtVCWithPad
|
||||
* @return
|
||||
*/
|
||||
@Requires({"vc != null", "refBasesStartingAtVCWithPad != null && refBasesStartingAtVCWithPad.length > 0"})
|
||||
public static Pair<List<Integer>,byte[]> getNumTandemRepeatUnits(final VariantContext vc, final byte[] refBasesStartingAtVCWithPad) {
|
||||
final boolean VERBOSE = false;
|
||||
final String refBasesStartingAtVCWithoutPad = new String(refBasesStartingAtVCWithPad).substring(1);
|
||||
if ( ! vc.isIndel() ) // only indels are tandem repeats
|
||||
return null;
|
||||
|
||||
final Allele refAllele = vc.getReference();
|
||||
final byte[] refAlleleBases = Arrays.copyOfRange(refAllele.getBases(), 1, refAllele.length());
|
||||
|
||||
byte[] repeatUnit = null;
|
||||
final ArrayList<Integer> lengths = new ArrayList<Integer>();
|
||||
|
||||
for ( final Allele allele : vc.getAlternateAlleles() ) {
|
||||
Pair<int[],byte[]> result = getNumTandemRepeatUnits(refAlleleBases, Arrays.copyOfRange(allele.getBases(), 1, allele.length()), refBasesStartingAtVCWithoutPad.getBytes());
|
||||
|
||||
final int[] repetitionCount = result.first;
|
||||
// repetition count = 0 means allele is not a tandem expansion of context
|
||||
if (repetitionCount[0] == 0 || repetitionCount[1] == 0)
|
||||
return null;
|
||||
|
||||
if (lengths.size() == 0) {
|
||||
lengths.add(repetitionCount[0]); // add ref allele length only once
|
||||
}
|
||||
lengths.add(repetitionCount[1]); // add this alt allele's length
|
||||
|
||||
repeatUnit = result.second;
|
||||
if (VERBOSE) {
|
||||
System.out.println("RefContext:"+refBasesStartingAtVCWithoutPad);
|
||||
System.out.println("Ref:"+refAllele.toString()+" Count:" + String.valueOf(repetitionCount[0]));
|
||||
System.out.println("Allele:"+allele.toString()+" Count:" + String.valueOf(repetitionCount[1]));
|
||||
System.out.println("RU:"+new String(repeatUnit));
|
||||
}
|
||||
}
|
||||
|
||||
return new Pair<List<Integer>, byte[]>(lengths,repeatUnit);
|
||||
}
|
||||
|
||||
protected static Pair<int[],byte[]> getNumTandemRepeatUnits(final byte[] refBases, final byte[] altBases, final byte[] remainingRefContext) {
|
||||
/* we can't exactly apply same logic as in basesAreRepeated() to compute tandem unit and number of repeated units.
|
||||
Consider case where ref =ATATAT and we have an insertion of ATAT. Natural description is (AT)3 -> (AT)5.
|
||||
*/
|
||||
|
||||
byte[] longB;
|
||||
// find first repeat unit based on either ref or alt, whichever is longer
|
||||
if (altBases.length > refBases.length)
|
||||
longB = altBases;
|
||||
else
|
||||
longB = refBases;
|
||||
|
||||
// see if non-null allele (either ref or alt, whichever is longer) can be decomposed into several identical tandem units
|
||||
// for example, -*,CACA needs to first be decomposed into (CA)2
|
||||
final int repeatUnitLength = findRepeatedSubstring(longB);
|
||||
final byte[] repeatUnit = Arrays.copyOf(longB, repeatUnitLength);
|
||||
|
||||
final int[] repetitionCount = new int[2];
|
||||
// repetitionCount[0] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(refBases, remainingRefContext));
|
||||
// repetitionCount[1] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(altBases, remainingRefContext));
|
||||
int repetitionsInRef = findNumberofRepetitions(repeatUnit,refBases);
|
||||
repetitionCount[0] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(refBases, remainingRefContext))-repetitionsInRef;
|
||||
repetitionCount[1] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(altBases, remainingRefContext))-repetitionsInRef;
|
||||
|
||||
return new Pair<int[], byte[]>(repetitionCount, repeatUnit);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Find out if a string can be represented as a tandem number of substrings.
|
||||
* For example ACTACT is a 2-tandem of ACT,
|
||||
* but ACTACA is not.
|
||||
*
|
||||
* @param bases String to be tested
|
||||
* @return Length of repeat unit, if string can be represented as tandem of substring (if it can't
|
||||
* be represented as one, it will be just the length of the input string)
|
||||
*/
|
||||
public static int findRepeatedSubstring(byte[] bases) {
|
||||
|
||||
int repLength;
|
||||
for (repLength=1; repLength <=bases.length; repLength++) {
|
||||
final byte[] candidateRepeatUnit = Arrays.copyOf(bases,repLength);
|
||||
boolean allBasesMatch = true;
|
||||
for (int start = repLength; start < bases.length; start += repLength ) {
|
||||
// check that remaining of string is exactly equal to repeat unit
|
||||
final byte[] basePiece = Arrays.copyOfRange(bases,start,start+candidateRepeatUnit.length);
|
||||
if (!Arrays.equals(candidateRepeatUnit, basePiece)) {
|
||||
allBasesMatch = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (allBasesMatch)
|
||||
return repLength;
|
||||
}
|
||||
|
||||
return repLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper routine that finds number of repetitions a string consists of.
|
||||
* For example, for string ATAT and repeat unit AT, number of repetitions = 2
|
||||
* @param repeatUnit Substring
|
||||
* @param testString String to test
|
||||
* @return Number of repetitions (0 if testString is not a concatenation of n repeatUnit's
|
||||
*/
|
||||
public static int findNumberofRepetitions(byte[] repeatUnit, byte[] testString) {
|
||||
int numRepeats = 0;
|
||||
for (int start = 0; start < testString.length; start += repeatUnit.length) {
|
||||
int end = start + repeatUnit.length;
|
||||
byte[] unit = Arrays.copyOfRange(testString,start, end);
|
||||
if(Arrays.equals(unit,repeatUnit))
|
||||
numRepeats++;
|
||||
else
|
||||
return numRepeats;
|
||||
}
|
||||
return numRepeats;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function for isTandemRepeat that checks that allele matches somewhere on the reference
|
||||
* @param ref
|
||||
* @param alt
|
||||
* @param refBasesStartingAtVCWithoutPad
|
||||
* @return
|
||||
*/
|
||||
protected static boolean isRepeatAllele(final Allele ref, final Allele alt, final String refBasesStartingAtVCWithoutPad) {
|
||||
if ( ! Allele.oneIsPrefixOfOther(ref, alt) )
|
||||
return false; // we require one allele be a prefix of another
|
||||
|
||||
if ( ref.length() > alt.length() ) { // we are a deletion
|
||||
return basesAreRepeated(ref.getBaseString(), alt.getBaseString(), refBasesStartingAtVCWithoutPad, 2);
|
||||
} else { // we are an insertion
|
||||
return basesAreRepeated(alt.getBaseString(), ref.getBaseString(), refBasesStartingAtVCWithoutPad, 1);
|
||||
}
|
||||
}
|
||||
|
||||
protected static boolean basesAreRepeated(final String l, final String s, final String ref, final int minNumberOfMatches) {
|
||||
final String potentialRepeat = l.substring(s.length()); // skip s bases
|
||||
|
||||
for ( int i = 0; i < minNumberOfMatches; i++) {
|
||||
final int start = i * potentialRepeat.length();
|
||||
final int end = (i+1) * potentialRepeat.length();
|
||||
if ( ref.length() < end )
|
||||
return false; // we ran out of bases to test
|
||||
final String refSub = ref.substring(start, end);
|
||||
if ( ! refSub.equals(potentialRepeat) )
|
||||
return false; // repeat didn't match, fail
|
||||
}
|
||||
|
||||
return true; // we passed all tests, we matched
|
||||
}
|
||||
|
||||
/**
|
||||
* Assign genotypes (GTs) to the samples in the Variant Context greedily based on the PLs
|
||||
*
|
||||
* @param vc variant context with genotype likelihoods
|
||||
* @return genotypes context
|
||||
*/
|
||||
public static GenotypesContext assignDiploidGenotypes(final VariantContext vc) {
|
||||
return subsetDiploidAlleles(vc, vc.getAlleles(), true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Split variant context into its biallelic components if there are more than 2 alleles
|
||||
*
|
||||
* For VC has A/B/C alleles, returns A/B and A/C contexts.
|
||||
* Genotypes are all no-calls now (it's not possible to fix them easily)
|
||||
* Alleles are right trimmed to satisfy VCF conventions
|
||||
*
|
||||
* If vc is biallelic or non-variant it is just returned
|
||||
*
|
||||
* Chromosome counts are updated (but they are by definition 0)
|
||||
*
|
||||
* @param vc a potentially multi-allelic variant context
|
||||
* @return a list of bi-allelic (or monomorphic) variant context
|
||||
*/
|
||||
public static List<VariantContext> splitVariantContextToBiallelics(final VariantContext vc) {
|
||||
if ( ! vc.isVariant() || vc.isBiallelic() )
|
||||
// non variant or biallelics already satisfy the contract
|
||||
return Collections.singletonList(vc);
|
||||
else {
|
||||
final List<VariantContext> biallelics = new LinkedList<VariantContext>();
|
||||
|
||||
for ( final Allele alt : vc.getAlternateAlleles() ) {
|
||||
VariantContextBuilder builder = new VariantContextBuilder(vc);
|
||||
final List<Allele> alleles = Arrays.asList(vc.getReference(), alt);
|
||||
builder.alleles(alleles);
|
||||
builder.genotypes(subsetDiploidAlleles(vc, alleles, false));
|
||||
calculateChromosomeCounts(builder, true);
|
||||
biallelics.add(reverseTrimAlleles(builder.make()));
|
||||
}
|
||||
|
||||
return biallelics;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* subset the Variant Context to the specific set of alleles passed in (pruning the PLs appropriately)
|
||||
*
|
||||
* @param vc variant context with genotype likelihoods
|
||||
* @param allelesToUse which alleles from the vc are okay to use; *** must be in the same relative order as those in the original VC ***
|
||||
* @param assignGenotypes true if we should update the genotypes based on the (subsetted) PLs
|
||||
* @return genotypes
|
||||
*/
|
||||
public static GenotypesContext subsetDiploidAlleles(final VariantContext vc,
|
||||
final List<Allele> allelesToUse,
|
||||
final boolean assignGenotypes) {
|
||||
|
||||
// the genotypes with PLs
|
||||
final GenotypesContext oldGTs = vc.getGenotypes();
|
||||
|
||||
// samples
|
||||
final List<String> sampleIndices = oldGTs.getSampleNamesOrderedByName();
|
||||
|
||||
// the new genotypes to create
|
||||
final GenotypesContext newGTs = GenotypesContext.create();
|
||||
|
||||
// we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward
|
||||
final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
|
||||
final int numNewAltAlleles = allelesToUse.size() - 1;
|
||||
|
||||
// which PLs should be carried forward?
|
||||
ArrayList<Integer> likelihoodIndexesToUse = null;
|
||||
|
||||
// an optimization: if we are supposed to use all (or none in the case of a ref call) of the alleles,
|
||||
// then we can keep the PLs as is; otherwise, we determine which ones to keep
|
||||
if ( numNewAltAlleles != numOriginalAltAlleles && numNewAltAlleles > 0 ) {
|
||||
likelihoodIndexesToUse = new ArrayList<Integer>(30);
|
||||
|
||||
final boolean[] altAlleleIndexToUse = new boolean[numOriginalAltAlleles];
|
||||
for ( int i = 0; i < numOriginalAltAlleles; i++ ) {
|
||||
if ( allelesToUse.contains(vc.getAlternateAllele(i)) )
|
||||
altAlleleIndexToUse[i] = true;
|
||||
}
|
||||
|
||||
// numLikelihoods takes total # of alleles. Use default # of chromosomes (ploidy) = 2
|
||||
final int numLikelihoods = GenotypeLikelihoods.numLikelihoods(1 + numOriginalAltAlleles, DEFAULT_PLOIDY);
|
||||
for ( int PLindex = 0; PLindex < numLikelihoods; PLindex++ ) {
|
||||
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
|
||||
// consider this entry only if both of the alleles are good
|
||||
if ( (alleles.alleleIndex1 == 0 || altAlleleIndexToUse[alleles.alleleIndex1 - 1]) && (alleles.alleleIndex2 == 0 || altAlleleIndexToUse[alleles.alleleIndex2 - 1]) )
|
||||
likelihoodIndexesToUse.add(PLindex);
|
||||
}
|
||||
}
|
||||
|
||||
// create the new genotypes
|
||||
for ( int k = 0; k < oldGTs.size(); k++ ) {
|
||||
final Genotype g = oldGTs.get(sampleIndices.get(k));
|
||||
if ( !g.hasLikelihoods() ) {
|
||||
newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES));
|
||||
continue;
|
||||
}
|
||||
|
||||
// create the new likelihoods array from the alleles we are allowed to use
|
||||
final double[] originalLikelihoods = g.getLikelihoods().getAsVector();
|
||||
double[] newLikelihoods;
|
||||
if ( likelihoodIndexesToUse == null ) {
|
||||
newLikelihoods = originalLikelihoods;
|
||||
} else {
|
||||
newLikelihoods = new double[likelihoodIndexesToUse.size()];
|
||||
int newIndex = 0;
|
||||
for ( int oldIndex : likelihoodIndexesToUse )
|
||||
newLikelihoods[newIndex++] = originalLikelihoods[oldIndex];
|
||||
|
||||
// might need to re-normalize
|
||||
newLikelihoods = GeneralUtils.normalizeFromLog10(newLikelihoods, false, true);
|
||||
}
|
||||
|
||||
// if there is no mass on the (new) likelihoods, then just no-call the sample
|
||||
if ( GeneralUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) {
|
||||
newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES));
|
||||
}
|
||||
else {
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(g);
|
||||
|
||||
if ( numNewAltAlleles == 0 )
|
||||
gb.noPL();
|
||||
else
|
||||
gb.PL(newLikelihoods);
|
||||
|
||||
// if we weren't asked to assign a genotype, then just no-call the sample
|
||||
if ( !assignGenotypes || GeneralUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) {
|
||||
gb.alleles(NO_CALL_ALLELES);
|
||||
}
|
||||
else {
|
||||
// find the genotype with maximum likelihoods
|
||||
int PLindex = numNewAltAlleles == 0 ? 0 : GeneralUtils.maxElementIndex(newLikelihoods);
|
||||
GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
|
||||
|
||||
gb.alleles(Arrays.asList(allelesToUse.get(alleles.alleleIndex1), allelesToUse.get(alleles.alleleIndex2)));
|
||||
if ( numNewAltAlleles != 0 ) gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods));
|
||||
}
|
||||
newGTs.add(gb.make());
|
||||
}
|
||||
}
|
||||
|
||||
return newGTs;
|
||||
}
|
||||
|
||||
public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) {
|
||||
|
||||
// see whether we need to trim common reference base from all alleles
|
||||
final int trimExtent = computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, false);
|
||||
if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 )
|
||||
return inputVC;
|
||||
|
||||
final List<Allele> alleles = new ArrayList<Allele>();
|
||||
final GenotypesContext genotypes = GenotypesContext.create();
|
||||
final Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
|
||||
|
||||
for (final Allele a : inputVC.getAlleles()) {
|
||||
if (a.isSymbolic()) {
|
||||
alleles.add(a);
|
||||
originalToTrimmedAlleleMap.put(a, a);
|
||||
} else {
|
||||
// get bases for current allele and create a new one with trimmed bases
|
||||
final byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent);
|
||||
final Allele trimmedAllele = Allele.create(newBases, a.isReference());
|
||||
alleles.add(trimmedAllele);
|
||||
originalToTrimmedAlleleMap.put(a, trimmedAllele);
|
||||
}
|
||||
}
|
||||
|
||||
// now we can recreate new genotypes with trimmed alleles
|
||||
for ( final Genotype genotype : inputVC.getGenotypes() ) {
|
||||
final List<Allele> originalAlleles = genotype.getAlleles();
|
||||
final List<Allele> trimmedAlleles = new ArrayList<Allele>();
|
||||
for ( final Allele a : originalAlleles ) {
|
||||
if ( a.isCalled() )
|
||||
trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
|
||||
else
|
||||
trimmedAlleles.add(Allele.NO_CALL);
|
||||
}
|
||||
genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make());
|
||||
}
|
||||
|
||||
return new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length() - 1).alleles(alleles).genotypes(genotypes).make();
|
||||
}
|
||||
|
||||
public static int computeReverseClipping(final List<Allele> unclippedAlleles,
|
||||
final byte[] ref,
|
||||
final int forwardClipping,
|
||||
final boolean allowFullClip) {
|
||||
int clipping = 0;
|
||||
boolean stillClipping = true;
|
||||
|
||||
while ( stillClipping ) {
|
||||
for ( final Allele a : unclippedAlleles ) {
|
||||
if ( a.isSymbolic() )
|
||||
continue;
|
||||
|
||||
// we need to ensure that we don't reverse clip out all of the bases from an allele because we then will have the wrong
|
||||
// position set for the VariantContext (although it's okay to forward clip it all out, because the position will be fine).
|
||||
if ( a.length() - clipping == 0 )
|
||||
return clipping - (allowFullClip ? 0 : 1);
|
||||
|
||||
if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 ) {
|
||||
stillClipping = false;
|
||||
}
|
||||
else if ( ref.length == clipping ) {
|
||||
if ( allowFullClip )
|
||||
stillClipping = false;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
else if ( a.getBases()[a.length()-clipping-1] != ref[ref.length-clipping-1] ) {
|
||||
stillClipping = false;
|
||||
}
|
||||
}
|
||||
if ( stillClipping )
|
||||
clipping++;
|
||||
}
|
||||
|
||||
return clipping;
|
||||
}
|
||||
|
||||
/**
|
||||
* A simple but common wrapper for matching VariantContext objects using JEXL expressions
|
||||
*/
|
||||
|
|
@ -602,9 +1028,10 @@ public class VariantContextUtils {
|
|||
if (vc.alleles.size() == 1)
|
||||
continue;
|
||||
if ( hasPLIncompatibleAlleles(alleles, vc.alleles)) {
|
||||
if ( ! genotypes.isEmpty() )
|
||||
logger.debug(String.format("Stripping PLs at %s:%d-%d due to incompatible alleles merged=%s vs. single=%s",
|
||||
vc.getChr(), vc.getStart(), vc.getEnd(), alleles, vc.alleles));
|
||||
if ( GeneralUtils.DEBUG_MODE_ENABLED && ! genotypes.isEmpty() ) {
|
||||
System.err.println(String.format("Stripping PLs at %s:%d-%d due to incompatible alleles merged=%s vs. single=%s",
|
||||
vc.getChr(), vc.getStart(), vc.getEnd(), alleles, vc.alleles));
|
||||
}
|
||||
genotypes = stripPLsAndAD(genotypes);
|
||||
// this will remove stale AC,AF attributed from vc
|
||||
calculateChromosomeCounts(vc, attributes, true);
|
||||
|
|
@ -635,7 +1062,7 @@ public class VariantContextUtils {
|
|||
for ( final VariantContext vc : VCs )
|
||||
if ( vc.isVariant() )
|
||||
s.add( vc.isFiltered() ? MERGE_FILTER_PREFIX + vc.getSource() : vc.getSource() );
|
||||
setValue = Utils.join("-", s);
|
||||
setValue = GeneralUtils.join("-", s);
|
||||
}
|
||||
|
||||
if ( setKey != null ) {
|
||||
|
|
@ -649,7 +1076,7 @@ public class VariantContextUtils {
|
|||
if ( depth > 0 )
|
||||
attributes.put(VCFConstants.DEPTH_KEY, String.valueOf(depth));
|
||||
|
||||
final String ID = rsIDs.isEmpty() ? VCFConstants.EMPTY_ID_FIELD : Utils.join(",", rsIDs);
|
||||
final String ID = rsIDs.isEmpty() ? VCFConstants.EMPTY_ID_FIELD : GeneralUtils.join(",", rsIDs);
|
||||
|
||||
final VariantContextBuilder builder = new VariantContextBuilder().source(name).id(ID);
|
||||
builder.loc(longestVC.getChr(), longestVC.getStart(), longestVC.getEnd());
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ package org.broadinstitute.variant.variantcontext;
|
|||
|
||||
import org.apache.commons.jexl2.JexlContext;
|
||||
import org.apache.commons.jexl2.MapContext;
|
||||
import org.broadinstitute.variant.utils.Utils;
|
||||
import org.broadinstitute.variant.utils.GeneralUtils;
|
||||
import org.broadinstitute.variant.vcf.VCFConstants;
|
||||
|
||||
import java.util.Collection;
|
||||
|
|
@ -168,7 +168,7 @@ class JEXLMap implements Map<VariantContextUtils.JexlVCMatchExp, Boolean> {
|
|||
infoMap.put("QUAL", String.valueOf(vc.getPhredScaledQual()));
|
||||
|
||||
// add alleles
|
||||
infoMap.put("ALLELES", Utils.join(";", vc.getAlleles()));
|
||||
infoMap.put("ALLELES", GeneralUtils.join(";", vc.getAlleles()));
|
||||
infoMap.put("N_ALLELES", String.valueOf(vc.getNAlleles()));
|
||||
|
||||
// add attributes
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ package org.broadinstitute.variant.variantcontext.writer;
|
|||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.variant.utils.GeneralUtils;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
|
||||
import java.util.HashMap;
|
||||
|
|
@ -40,7 +40,6 @@ import java.util.Map;
|
|||
* @since 06/12
|
||||
*/
|
||||
public class BCF2FieldWriterManager {
|
||||
final protected static Logger logger = Logger.getLogger(BCF2FieldWriterManager.class);
|
||||
final Map<String, BCF2FieldWriter.SiteWriter> siteWriters = new HashMap<String, BCF2FieldWriter.SiteWriter>();
|
||||
final Map<String, BCF2FieldWriter.GenotypesWriter> genotypesWriters = new HashMap<String, BCF2FieldWriter.GenotypesWriter>();
|
||||
final IntGenotypeFieldAccessors intGenotypeFieldAccessors = new IntGenotypeFieldAccessors();
|
||||
|
|
@ -98,8 +97,8 @@ public class BCF2FieldWriterManager {
|
|||
final boolean createGenotypesEncoders ) {
|
||||
|
||||
if ( createGenotypesEncoders && intGenotypeFieldAccessors.getAccessor(line.getID()) != null ) {
|
||||
if ( line.getType() != VCFHeaderLineType.Integer )
|
||||
logger.warn("Warning: field " + line.getID() + " expected to encode an integer but saw " + line.getType() + " for record " + line);
|
||||
if ( GeneralUtils.DEBUG_MODE_ENABLED && line.getType() != VCFHeaderLineType.Integer )
|
||||
System.err.println("Warning: field " + line.getID() + " expected to encode an integer but saw " + line.getType() + " for record " + line);
|
||||
return new BCF2FieldEncoder.IntArray(line, dict);
|
||||
} else if ( createGenotypesEncoders && line.getID().equals(VCFConstants.GENOTYPE_KEY) ) {
|
||||
return new BCF2FieldEncoder.GenericInts(line, dict);
|
||||
|
|
|
|||
|
|
@ -28,11 +28,11 @@ package org.broadinstitute.variant.variantcontext.writer;
|
|||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.variant.bcf2.BCF2Codec;
|
||||
import org.broadinstitute.variant.bcf2.BCF2Type;
|
||||
import org.broadinstitute.variant.bcf2.BCF2Utils;
|
||||
import org.broadinstitute.variant.bcf2.BCFVersion;
|
||||
import org.broadinstitute.variant.utils.GeneralUtils;
|
||||
import org.broadinstitute.variant.vcf.VCFConstants;
|
||||
import org.broadinstitute.variant.vcf.VCFContigHeaderLine;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
|
|
@ -89,7 +89,6 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
public static final int MAJOR_VERSION = 2;
|
||||
public static final int MINOR_VERSION = 1;
|
||||
|
||||
final protected static Logger logger = Logger.getLogger(BCF2Writer.class);
|
||||
final private static boolean ALLOW_MISSING_CONTIG_LINES = false;
|
||||
|
||||
private final OutputStream outputStream; // Note: do not flush until completely done writing, to avoid issues with eventual BGZF support
|
||||
|
|
@ -129,7 +128,9 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
// create the config offsets map
|
||||
if ( header.getContigLines().isEmpty() ) {
|
||||
if ( ALLOW_MISSING_CONTIG_LINES ) {
|
||||
logger.warn("No contig dictionary found in header, falling back to reference sequence dictionary");
|
||||
if ( GeneralUtils.DEBUG_MODE_ENABLED ) {
|
||||
System.err.println("No contig dictionary found in header, falling back to reference sequence dictionary");
|
||||
}
|
||||
createContigDictionary(VCFUtils.makeContigHeaderLines(getRefDict(), null));
|
||||
} else {
|
||||
throw new IllegalStateException("Cannot write BCF2 file with missing contig lines");
|
||||
|
|
@ -275,10 +276,8 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
|
||||
if ( lgc.getUnparsedGenotypeData() instanceof BCF2Codec.LazyData &&
|
||||
canSafelyWriteRawGenotypesBytes((BCF2Codec.LazyData) lgc.getUnparsedGenotypeData())) {
|
||||
//logger.info("Passing on raw BCF2 genotypes data");
|
||||
return (BCF2Codec.LazyData)lgc.getUnparsedGenotypeData();
|
||||
} else {
|
||||
//logger.info("Decoding raw BCF2 genotypes data");
|
||||
lgc.decode(); // WARNING -- required to avoid keeping around bad lazy data for too long
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,7 +25,6 @@
|
|||
|
||||
package org.broadinstitute.variant.vcf;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.AsciiFeatureCodec;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broad.tribble.NameAwareCodec;
|
||||
|
|
@ -33,6 +32,7 @@ import org.broad.tribble.TribbleException;
|
|||
import org.broad.tribble.readers.LineReader;
|
||||
import org.broad.tribble.util.BlockCompressedInputStream;
|
||||
import org.broad.tribble.util.ParsingUtils;
|
||||
import org.broadinstitute.variant.utils.GeneralUtils;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
|
||||
import java.io.FileInputStream;
|
||||
|
|
@ -46,7 +46,6 @@ import java.util.zip.GZIPInputStream;
|
|||
public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext> implements NameAwareCodec {
|
||||
public final static int MAX_ALLELE_SIZE_BEFORE_WARNING = (int)Math.pow(2, 20);
|
||||
|
||||
protected final static Logger log = Logger.getLogger(AbstractVCFCodec.class);
|
||||
protected final static int NUM_STANDARD_FIELDS = 8; // INFO is the 8th column
|
||||
|
||||
// we have to store the list of strings that make up the header until they're needed
|
||||
|
|
@ -397,9 +396,9 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
key = infoFieldArray[i];
|
||||
final VCFInfoHeaderLine headerLine = header.getInfoHeaderLine(key);
|
||||
if ( headerLine != null && headerLine.getType() != VCFHeaderLineType.Flag ) {
|
||||
if ( ! warnedAboutNoEqualsForNonFlag ) {
|
||||
log.warn("Found info key " + key + " without a = value, but the header says the field is of type "
|
||||
+ headerLine.getType() + " but this construct is only value for FLAG type fields");
|
||||
if ( GeneralUtils.DEBUG_MODE_ENABLED && ! warnedAboutNoEqualsForNonFlag ) {
|
||||
System.err.println("Found info key " + key + " without a = value, but the header says the field is of type "
|
||||
+ headerLine.getType() + " but this construct is only value for FLAG type fields");
|
||||
warnedAboutNoEqualsForNonFlag = true;
|
||||
}
|
||||
|
||||
|
|
@ -517,8 +516,9 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
if ( allele == null || allele.length() == 0 )
|
||||
generateException("Empty alleles are not permitted in VCF records", lineNo);
|
||||
|
||||
if ( MAX_ALLELE_SIZE_BEFORE_WARNING != -1 && allele.length() > MAX_ALLELE_SIZE_BEFORE_WARNING )
|
||||
log.warn(String.format("Allele detected with length %d exceeding max size %d at approximately line %d, likely resulting in degraded VCF processing performance", allele.length(), MAX_ALLELE_SIZE_BEFORE_WARNING, lineNo));
|
||||
if ( GeneralUtils.DEBUG_MODE_ENABLED && MAX_ALLELE_SIZE_BEFORE_WARNING != -1 && allele.length() > MAX_ALLELE_SIZE_BEFORE_WARNING ) {
|
||||
System.err.println(String.format("Allele detected with length %d exceeding max size %d at approximately line %d, likely resulting in degraded VCF processing performance", allele.length(), MAX_ALLELE_SIZE_BEFORE_WARNING, lineNo));
|
||||
}
|
||||
|
||||
if ( isSymbolicAllele(allele) ) {
|
||||
if ( isRef ) {
|
||||
|
|
|
|||
|
|
@ -25,8 +25,8 @@
|
|||
|
||||
package org.broadinstitute.variant.vcf;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broadinstitute.variant.utils.GeneralUtils;
|
||||
import org.broadinstitute.variant.variantcontext.GenotypeLikelihoods;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
|
||||
|
|
@ -38,7 +38,6 @@ import java.util.Map;
|
|||
* a base class for compound header lines, which include info lines and format lines (so far)
|
||||
*/
|
||||
public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCFIDHeaderLine {
|
||||
final protected static Logger logger = Logger.getLogger(VCFHeader.class);
|
||||
|
||||
public enum SupportedHeaderLineType {
|
||||
INFO(true), FORMAT(false);
|
||||
|
|
@ -197,7 +196,9 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
|
|||
|
||||
if ( type == VCFHeaderLineType.Flag && count != 0 ) {
|
||||
count = 0;
|
||||
logger.warn("FLAG fields must have a count value of 0, but saw " + count + " for header line " + getID() + ". Changing it to 0 inside the code");
|
||||
if ( GeneralUtils.DEBUG_MODE_ENABLED ) {
|
||||
System.err.println("FLAG fields must have a count value of 0, but saw " + count + " for header line " + getID() + ". Changing it to 0 inside the code");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -25,9 +25,9 @@
|
|||
|
||||
package org.broadinstitute.variant.vcf;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broad.tribble.util.ParsingUtils;
|
||||
import org.broadinstitute.variant.utils.GeneralUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -45,7 +45,6 @@ import java.util.*;
|
|||
* A class representing the VCF header
|
||||
*/
|
||||
public class VCFHeader {
|
||||
final protected static Logger logger = Logger.getLogger(VCFHeader.class);
|
||||
|
||||
// the mandatory header fields
|
||||
public enum HEADER_FIELDS {
|
||||
|
|
@ -238,9 +237,11 @@ public class VCFHeader {
|
|||
}
|
||||
|
||||
if ( hasFormatLine(VCFConstants.GENOTYPE_LIKELIHOODS_KEY) && ! hasFormatLine(VCFConstants.GENOTYPE_PL_KEY) ) {
|
||||
logger.warn("Found " + VCFConstants.GENOTYPE_LIKELIHOODS_KEY + " format, but no "
|
||||
+ VCFConstants.GENOTYPE_PL_KEY + " field. We now only manage PL fields internally"
|
||||
+ " automatically adding a corresponding PL field to your VCF header");
|
||||
if ( GeneralUtils.DEBUG_MODE_ENABLED ) {
|
||||
System.err.println("Found " + VCFConstants.GENOTYPE_LIKELIHOODS_KEY + " format, but no "
|
||||
+ VCFConstants.GENOTYPE_PL_KEY + " field. We now only manage PL fields internally"
|
||||
+ " automatically adding a corresponding PL field to your VCF header");
|
||||
}
|
||||
addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_PL_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"));
|
||||
}
|
||||
}
|
||||
|
|
@ -254,10 +255,14 @@ public class VCFHeader {
|
|||
*/
|
||||
private final <T extends VCFCompoundHeaderLine> void addMetaDataMapBinding(final Map<String, T> map, T line) {
|
||||
final String key = line.getID();
|
||||
if ( map.containsKey(key) )
|
||||
logger.debug("Found duplicate VCF header lines for " + key + "; keeping the first only" );
|
||||
else
|
||||
if ( map.containsKey(key) ) {
|
||||
if ( GeneralUtils.DEBUG_MODE_ENABLED ) {
|
||||
System.err.println("Found duplicate VCF header lines for " + key + "; keeping the first only" );
|
||||
}
|
||||
}
|
||||
else {
|
||||
map.put(key, line);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -27,8 +27,8 @@ package org.broadinstitute.variant.vcf;
|
|||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broadinstitute.variant.utils.GeneralUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -46,7 +46,6 @@ public class VCFStandardHeaderLines {
|
|||
* Enabling this causes us to repair header lines even if only their descriptions differ
|
||||
*/
|
||||
private final static boolean REPAIR_BAD_DESCRIPTIONS = false;
|
||||
protected final static Logger logger = Logger.getLogger(VCFStandardHeaderLines.class);
|
||||
private static Standards<VCFFormatHeaderLine> formatStandards = new Standards<VCFFormatHeaderLine>();
|
||||
private static Standards<VCFInfoHeaderLine> infoStandards = new Standards<VCFInfoHeaderLine>();
|
||||
|
||||
|
|
@ -216,11 +215,13 @@ public class VCFStandardHeaderLines {
|
|||
final boolean needsRepair = badCountType || badCount || badType || (REPAIR_BAD_DESCRIPTIONS && badDesc);
|
||||
|
||||
if ( needsRepair ) {
|
||||
logger.warn("Repairing standard header line for field " + line.getID() + " because"
|
||||
+ (badCountType ? " -- count types disagree; header has " + line.getCountType() + " but standard is " + standard.getCountType() : "")
|
||||
+ (badType ? " -- type disagree; header has " + line.getType() + " but standard is " + standard.getType() : "")
|
||||
+ (badCount ? " -- counts disagree; header has " + line.getCount() + " but standard is " + standard.getCount() : "")
|
||||
+ (badDesc ? " -- descriptions disagree; header has '" + line.getDescription() + "' but standard is '" + standard.getDescription() + "'": ""));
|
||||
if ( GeneralUtils.DEBUG_MODE_ENABLED ) {
|
||||
System.err.println("Repairing standard header line for field " + line.getID() + " because"
|
||||
+ (badCountType ? " -- count types disagree; header has " + line.getCountType() + " but standard is " + standard.getCountType() : "")
|
||||
+ (badType ? " -- type disagree; header has " + line.getType() + " but standard is " + standard.getType() : "")
|
||||
+ (badCount ? " -- counts disagree; header has " + line.getCount() + " but standard is " + standard.getCount() : "")
|
||||
+ (badDesc ? " -- descriptions disagree; header has '" + line.getDescription() + "' but standard is '" + standard.getDescription() + "'": ""));
|
||||
}
|
||||
return standard;
|
||||
} else
|
||||
return line;
|
||||
|
|
|
|||
|
|
@ -28,17 +28,22 @@ package org.broadinstitute.variant.vcf;
|
|||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.apache.commons.io.FilenameUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.FeatureCodecHeader;
|
||||
import org.broad.tribble.readers.PositionalBufferedStream;
|
||||
import org.broadinstitute.variant.utils.GeneralUtils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
public class VCFUtils {
|
||||
|
||||
public static Set<VCFHeaderLine> smartMergeHeaders(Collection<VCFHeader> headers, Logger logger) throws IllegalStateException {
|
||||
public static Set<VCFHeaderLine> smartMergeHeaders(Collection<VCFHeader> headers, boolean emitWarnings) throws IllegalStateException {
|
||||
HashMap<String, VCFHeaderLine> map = new HashMap<String, VCFHeaderLine>(); // from KEY.NAME -> line
|
||||
HeaderConflictWarner conflictWarner = new HeaderConflictWarner(logger);
|
||||
HeaderConflictWarner conflictWarner = new HeaderConflictWarner(emitWarnings);
|
||||
|
||||
// todo -- needs to remove all version headers from sources and add its own VCF version line
|
||||
for ( VCFHeader source : headers ) {
|
||||
|
|
@ -193,19 +198,48 @@ public class VCFUtils {
|
|||
return assembly;
|
||||
}
|
||||
|
||||
/** Only displays a warning if a logger is provided and an identical warning hasn't been already issued */
|
||||
/**
|
||||
* Read all of the VCF records from source into memory, returning the header and the VariantContexts
|
||||
*
|
||||
* @param source the file to read, must be in VCF4 format
|
||||
* @return
|
||||
* @throws java.io.IOException
|
||||
*/
|
||||
public static Pair<VCFHeader, List<VariantContext>> readVCF(final File source) throws IOException {
|
||||
// read in the features
|
||||
final List<VariantContext> vcs = new ArrayList<VariantContext>();
|
||||
final VCFCodec codec = new VCFCodec();
|
||||
PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(source));
|
||||
FeatureCodecHeader header = codec.readHeader(pbs);
|
||||
pbs.close();
|
||||
|
||||
pbs = new PositionalBufferedStream(new FileInputStream(source));
|
||||
pbs.skip(header.getHeaderEnd());
|
||||
|
||||
final VCFHeader vcfHeader = (VCFHeader)header.getHeaderValue();
|
||||
|
||||
while ( ! pbs.isDone() ) {
|
||||
final VariantContext vc = codec.decode(pbs);
|
||||
if ( vc != null )
|
||||
vcs.add(vc);
|
||||
}
|
||||
|
||||
return new Pair<VCFHeader, List<VariantContext>>(vcfHeader, vcs);
|
||||
}
|
||||
|
||||
/** Only displays a warning if warnings are enabled and an identical warning hasn't been already issued */
|
||||
private static final class HeaderConflictWarner {
|
||||
Logger logger;
|
||||
boolean emitWarnings;
|
||||
Set<String> alreadyIssued = new HashSet<String>();
|
||||
|
||||
private HeaderConflictWarner(final Logger logger) {
|
||||
this.logger = logger;
|
||||
private HeaderConflictWarner( final boolean emitWarnings ) {
|
||||
this.emitWarnings = emitWarnings;
|
||||
}
|
||||
|
||||
public void warn(final VCFHeaderLine line, final String msg) {
|
||||
if ( logger != null && ! alreadyIssued.contains(line.getKey()) ) {
|
||||
if ( GeneralUtils.DEBUG_MODE_ENABLED && emitWarnings && ! alreadyIssued.contains(line.getKey()) ) {
|
||||
alreadyIssued.add(line.getKey());
|
||||
logger.warn(msg);
|
||||
System.err.println(msg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,8 +35,8 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
|||
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.variant.bcf2.BCF2Utils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.variant.vcf.VCFCodec;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextTestProvider;
|
||||
|
|
|
|||
|
|
@ -25,11 +25,9 @@
|
|||
|
||||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import cern.jet.math.Arithmetic;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
|
||||
import org.jgrapht.alg.StrongConnectivityInspector;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
import org.testng.Assert;
|
||||
|
|
|
|||
|
|
@ -35,8 +35,8 @@ import org.broadinstitute.sting.BaseTest;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextTestProvider;
|
||||
import org.broadinstitute.variant.vcf.VCFCodec;
|
||||
|
|
|
|||
|
|
@ -23,10 +23,9 @@
|
|||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.variant.vcf;
|
||||
package org.broadinstitute.sting.utils.variant;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -23,13 +23,13 @@
|
|||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.variant.variantcontext;
|
||||
package org.broadinstitute.sting.utils.variant;
|
||||
|
||||
import com.google.caliper.Param;
|
||||
import com.google.caliper.SimpleBenchmark;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broad.tribble.FeatureCodec;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
import org.broadinstitute.variant.vcf.VCFCodec;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
|
@ -73,8 +73,6 @@ public class VariantContextBenchmark extends SimpleBenchmark {
|
|||
MERGE
|
||||
}
|
||||
|
||||
private GenomeLocParser b37GenomeLocParser;
|
||||
|
||||
@Override protected void setUp() {
|
||||
// TODO -- update for new tribble interface
|
||||
// try {
|
||||
|
|
@ -0,0 +1,141 @@
|
|||
package org.broadinstitute.variant;
|
||||
|
||||
import org.testng.Assert;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Base class for test classes within org.broadinstitute.variant
|
||||
*/
|
||||
public class VariantBaseTest {
|
||||
|
||||
public static final String hg19Reference = "/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta";
|
||||
public static final String b37KGReference = "/humgen/1kg/reference/human_g1k_v37.fasta";
|
||||
|
||||
// TODO: change this to an appropriate value once the move to the Picard repo takes place
|
||||
public static final String variantTestDataRoot = new File("private/testdata/").getAbsolutePath() + "/";
|
||||
|
||||
/**
|
||||
* Simple generic utility class to creating TestNG data providers:
|
||||
*
|
||||
* 1: inherit this class, as in
|
||||
*
|
||||
* private class SummarizeDifferenceTest extends TestDataProvider {
|
||||
* public SummarizeDifferenceTest() {
|
||||
* super(SummarizeDifferenceTest.class);
|
||||
* }
|
||||
* ...
|
||||
* }
|
||||
*
|
||||
* Provide a reference to your class to the TestDataProvider constructor.
|
||||
*
|
||||
* 2: Create instances of your subclass. Return from it the call to getTests, providing
|
||||
* the class type of your test
|
||||
*
|
||||
* @DataProvider(name = "summaries"
|
||||
* public Object[][] createSummaries() {
|
||||
* new SummarizeDifferenceTest().addDiff("A", "A").addSummary("A:2");
|
||||
* new SummarizeDifferenceTest().addDiff("A", "B").addSummary("A:1", "B:1");
|
||||
* return SummarizeDifferenceTest.getTests(SummarizeDifferenceTest.class);
|
||||
* }
|
||||
*
|
||||
* This class magically tracks created objects of this
|
||||
*/
|
||||
public static class TestDataProvider {
|
||||
private static final Map<Class, List<Object>> tests = new HashMap<Class, List<Object>>();
|
||||
protected String name;
|
||||
|
||||
/**
|
||||
* Create a new TestDataProvider instance bound to the class variable C
|
||||
* @param c
|
||||
*/
|
||||
public TestDataProvider(Class c, String name) {
|
||||
if ( ! tests.containsKey(c) )
|
||||
tests.put(c, new ArrayList<Object>());
|
||||
tests.get(c).add(this);
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public TestDataProvider(Class c) {
|
||||
this(c, "");
|
||||
}
|
||||
|
||||
public void setName(final String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return all of the data providers in the form expected by TestNG of type class C
|
||||
* @param c
|
||||
* @return
|
||||
*/
|
||||
public static Object[][] getTests(Class c) {
|
||||
List<Object[]> params2 = new ArrayList<Object[]>();
|
||||
for ( Object x : tests.get(c) ) params2.add(new Object[]{x});
|
||||
return params2.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "TestDataProvider("+name+")";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a temp file that will be deleted on exit after tests are complete.
|
||||
* @param name Prefix of the file.
|
||||
* @param extension Extension to concat to the end of the file.
|
||||
* @return A file in the temporary directory starting with name, ending with extension, which will be deleted after the program exits.
|
||||
*/
|
||||
public static File createTempFile(String name, String extension) {
|
||||
try {
|
||||
File file = File.createTempFile(name, extension);
|
||||
file.deleteOnExit();
|
||||
return file;
|
||||
} catch (IOException ex) {
|
||||
throw new RuntimeException("Cannot create temp file: " + ex.getMessage(), ex);
|
||||
}
|
||||
}
|
||||
|
||||
private static final double DEFAULT_FLOAT_TOLERANCE = 1e-1;
|
||||
|
||||
public static final void assertEqualsDoubleSmart(final Object actual, final Double expected) {
|
||||
Assert.assertTrue(actual instanceof Double, "Not a double");
|
||||
assertEqualsDoubleSmart((double)(Double)actual, (double)expected);
|
||||
}
|
||||
|
||||
public static final void assertEqualsDoubleSmart(final Object actual, final Double expected, final double tolerance) {
|
||||
Assert.assertTrue(actual instanceof Double, "Not a double");
|
||||
assertEqualsDoubleSmart((double)(Double)actual, (double)expected, tolerance);
|
||||
}
|
||||
|
||||
public static final void assertEqualsDoubleSmart(final double actual, final double expected) {
|
||||
assertEqualsDoubleSmart(actual, expected, DEFAULT_FLOAT_TOLERANCE);
|
||||
}
|
||||
|
||||
public static final <T> void assertEqualsSet(final Set<T> actual, final Set<T> expected, final String info) {
|
||||
final Set<T> actualSet = new HashSet<T>(actual);
|
||||
final Set<T> expectedSet = new HashSet<T>(expected);
|
||||
Assert.assertTrue(actualSet.equals(expectedSet), info); // note this is necessary due to testng bug for set comps
|
||||
}
|
||||
|
||||
public static void assertEqualsDoubleSmart(final double actual, final double expected, final double tolerance) {
|
||||
assertEqualsDoubleSmart(actual, expected, tolerance, null);
|
||||
}
|
||||
|
||||
public static void assertEqualsDoubleSmart(final double actual, final double expected, final double tolerance, final String message) {
|
||||
if ( Double.isNaN(expected) ) // NaN == NaN => false unfortunately
|
||||
Assert.assertTrue(Double.isNaN(actual), "expected is nan, actual is not");
|
||||
else if ( Double.isInfinite(expected) ) // NaN == NaN => false unfortunately
|
||||
Assert.assertTrue(Double.isInfinite(actual), "expected is infinite, actual is not");
|
||||
else {
|
||||
final double delta = Math.abs(actual - expected);
|
||||
final double ratio = Math.abs(actual / expected - 1.0);
|
||||
Assert.assertTrue(delta < tolerance || ratio < tolerance, "expected = " + expected + " actual = " + actual
|
||||
+ " not within tolerance " + tolerance
|
||||
+ (message == null ? "" : "message: " + message));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -25,12 +25,9 @@
|
|||
|
||||
package org.broadinstitute.variant.bcf2;
|
||||
|
||||
|
||||
// the imports for unit testing.
|
||||
|
||||
|
||||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.variant.VariantBaseTest;
|
||||
import org.broadinstitute.variant.variantcontext.writer.BCF2Encoder;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeSuite;
|
||||
|
|
@ -47,7 +44,7 @@ import java.util.Collections;
|
|||
import java.util.List;
|
||||
|
||||
|
||||
public class BCF2EncoderDecoderUnitTest extends BaseTest {
|
||||
public class BCF2EncoderDecoderUnitTest extends VariantBaseTest {
|
||||
private final double FLOAT_TOLERANCE = 1e-6;
|
||||
final List<BCF2TypedValue> primitives = new ArrayList<BCF2TypedValue>();
|
||||
final List<BCF2TypedValue> basicTypes = new ArrayList<BCF2TypedValue>();
|
||||
|
|
@ -561,7 +558,7 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest {
|
|||
final double valueFloat = (Double)tv.value;
|
||||
final double decodedFloat = (Double)decoded;
|
||||
|
||||
BaseTest.assertEqualsDoubleSmart(decodedFloat, valueFloat, FLOAT_TOLERANCE);
|
||||
VariantBaseTest.assertEqualsDoubleSmart(decodedFloat, valueFloat, FLOAT_TOLERANCE);
|
||||
} else
|
||||
Assert.assertEquals(decoded, tv.value);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,8 +25,8 @@
|
|||
|
||||
package org.broadinstitute.variant.bcf2;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.variant.VariantBaseTest;
|
||||
import org.broadinstitute.variant.utils.GeneralUtils;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -37,7 +37,7 @@ import org.testng.annotations.Test;
|
|||
/**
|
||||
* Tests for BCF2Utils
|
||||
*/
|
||||
public final class BCF2UtilsUnitTest extends BaseTest {
|
||||
public final class BCF2UtilsUnitTest extends VariantBaseTest {
|
||||
@DataProvider(name = "CollapseExpandTest")
|
||||
public Object[][] makeCollapseExpandTest() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
|
@ -87,7 +87,7 @@ public final class BCF2UtilsUnitTest extends BaseTest {
|
|||
final List<VCFHeaderLine> empty = Collections.emptyList();
|
||||
final List<List<VCFHeaderLine>> permutations = extrasToTake == 0
|
||||
? Collections.singletonList(empty)
|
||||
: Utils.makePermutations(extraLines, extrasToTake, false);
|
||||
: GeneralUtils.makePermutations(extraLines, extrasToTake, false);
|
||||
for ( final List<VCFHeaderLine> permutation : permutations ) {
|
||||
for ( int i = -1; i < inputLines.size(); i++ ) {
|
||||
final List<VCFHeaderLine> allLines = new ArrayList<VCFHeaderLine>(inputLines);
|
||||
|
|
@ -113,7 +113,7 @@ public final class BCF2UtilsUnitTest extends BaseTest {
|
|||
|
||||
final List<List<String>> permutations = testSamples.isEmpty()
|
||||
? Collections.singletonList(testSamples)
|
||||
: Utils.makePermutations(testSamples, testSamples.size(), false);
|
||||
: GeneralUtils.makePermutations(testSamples, testSamples.size(), false);
|
||||
for ( final List<String> testSamplesPermutation : permutations ) {
|
||||
final VCFHeader testHeaderWithSamples = new VCFHeader(inputHeader.getMetaDataInInputOrder(), testSamplesPermutation);
|
||||
final boolean expectedConsistent = testSamples.equals(inSamples);
|
||||
|
|
|
|||
|
|
@ -25,21 +25,18 @@
|
|||
|
||||
package org.broadinstitute.variant.utils;
|
||||
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.variant.VariantBaseTest;
|
||||
import org.testng.Assert;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.testng.annotations.Test;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
|
||||
|
||||
public class BaseUtilsUnitTest extends BaseTest {
|
||||
public class BaseUtilsUnitTest extends VariantBaseTest {
|
||||
@BeforeClass
|
||||
public void init() { }
|
||||
|
||||
@Test
|
||||
public void testMostFrequentBaseFraction() {
|
||||
logger.warn("Executing testMostFrequentBaseFraction");
|
||||
|
||||
compareFrequentBaseFractionToExpected("AAAAA", 1.0);
|
||||
compareFrequentBaseFractionToExpected("ACCG", 0.5);
|
||||
compareFrequentBaseFractionToExpected("ACCCCTTTTG", 4.0/10.0);
|
||||
|
|
@ -47,7 +44,7 @@ public class BaseUtilsUnitTest extends BaseTest {
|
|||
|
||||
private void compareFrequentBaseFractionToExpected(String sequence, double expected) {
|
||||
double fraction = BaseUtils.mostFrequentBaseFraction(sequence.getBytes());
|
||||
Assert.assertTrue(MathUtils.compareDoubles(fraction, expected) == 0);
|
||||
Assert.assertTrue(GeneralUtils.compareDoubles(fraction, expected) == 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -67,8 +64,6 @@ public class BaseUtilsUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testTransitionTransversion() {
|
||||
logger.warn("Executing testTransitionTransversion");
|
||||
|
||||
Assert.assertTrue( BaseUtils.SNPSubstitutionType( (byte)'A', (byte)'T' ) == BaseUtils.BaseSubstitutionType.TRANSVERSION );
|
||||
Assert.assertTrue( BaseUtils.SNPSubstitutionType( (byte)'A', (byte)'C' ) == BaseUtils.BaseSubstitutionType.TRANSVERSION );
|
||||
Assert.assertTrue( BaseUtils.SNPSubstitutionType( (byte)'A', (byte)'G' ) == BaseUtils.BaseSubstitutionType.TRANSITION );
|
||||
|
|
@ -94,8 +89,6 @@ public class BaseUtilsUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testReverseComplementString() {
|
||||
logger.warn("Executing testReverseComplementString");
|
||||
|
||||
compareRCStringToExpected("ACGGT", "ACCGT");
|
||||
compareRCStringToExpected("TCGTATATCTCGCTATATATATATAGCTCTAGTATA", "TATACTAGAGCTATATATATATAGCGAGATATACGA");
|
||||
compareRCStringToExpected("AAAN", "NTTT");
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ package org.broadinstitute.variant.variantcontext;
|
|||
|
||||
// the imports for unit testing.
|
||||
|
||||
import org.broadinstitute.variant.VariantBaseTest;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeSuite;
|
||||
import org.testng.annotations.Test;
|
||||
|
|
@ -44,7 +45,7 @@ import org.testng.annotations.Test;
|
|||
/**
|
||||
* Basic unit test for RecalData
|
||||
*/
|
||||
public class AlleleUnitTest {
|
||||
public class AlleleUnitTest extends VariantBaseTest {
|
||||
Allele ARef, A, T, ATIns, ATCIns, NoCall;
|
||||
|
||||
@BeforeSuite
|
||||
|
|
|
|||
|
|
@ -30,9 +30,9 @@ package org.broadinstitute.variant.variantcontext;
|
|||
|
||||
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broadinstitute.variant.VariantBaseTest;
|
||||
import org.broadinstitute.variant.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.variant.utils.GeneralUtils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
|
|
@ -44,7 +44,7 @@ import java.util.List;
|
|||
/**
|
||||
* Basic unit test for Genotype likelihoods objects
|
||||
*/
|
||||
public class GenotypeLikelihoodsUnitTest {
|
||||
public class GenotypeLikelihoodsUnitTest extends VariantBaseTest {
|
||||
double [] v = new double[]{-10.5, -1.25, -5.11};
|
||||
final static String vGLString = "-10.50,-1.25,-5.11";
|
||||
final static String vPLString = "93,0,39";
|
||||
|
|
@ -88,7 +88,7 @@ public class GenotypeLikelihoodsUnitTest {
|
|||
|
||||
//Linear scale
|
||||
glMap = gl.getAsMap(true);
|
||||
double [] vl = MathUtils.normalizeFromLog10(v);
|
||||
double [] vl = GeneralUtils.normalizeFromLog10(v);
|
||||
Assert.assertEquals(vl[GenotypeType.HOM_REF.ordinal()-1],glMap.get(GenotypeType.HOM_REF));
|
||||
Assert.assertEquals(vl[GenotypeType.HET.ordinal()-1],glMap.get(GenotypeType.HET));
|
||||
Assert.assertEquals(vl[GenotypeType.HOM_VAR.ordinal()-1],glMap.get(GenotypeType.HOM_VAR));
|
||||
|
|
@ -118,7 +118,7 @@ public class GenotypeLikelihoodsUnitTest {
|
|||
//GQ for the best guess genotype
|
||||
Assert.assertEquals(gl.getLog10GQ(GenotypeType.HET),-3.9);
|
||||
|
||||
double[] test = MathUtils.normalizeFromLog10(gl.getAsVector());
|
||||
double[] test = GeneralUtils.normalizeFromLog10(gl.getAsVector());
|
||||
|
||||
//GQ for the other genotypes
|
||||
Assert.assertEquals(gl.getLog10GQ(GenotypeType.HOM_REF), Math.log10(1.0 - test[GenotypeType.HOM_REF.ordinal()-1]));
|
||||
|
|
|
|||
|
|
@ -29,13 +29,13 @@ package org.broadinstitute.variant.variantcontext;
|
|||
// the imports for unit testing.
|
||||
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.variant.VariantBaseTest;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeSuite;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
|
||||
public class GenotypeUnitTest extends BaseTest {
|
||||
public class GenotypeUnitTest extends VariantBaseTest {
|
||||
Allele A, Aref, T;
|
||||
|
||||
@BeforeSuite
|
||||
|
|
|
|||
|
|
@ -30,8 +30,8 @@ package org.broadinstitute.variant.variantcontext;
|
|||
|
||||
|
||||
import org.broad.tribble.util.ParsingUtils;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.variant.VariantBaseTest;
|
||||
import org.broadinstitute.variant.utils.GeneralUtils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeSuite;
|
||||
import org.testng.annotations.DataProvider;
|
||||
|
|
@ -40,7 +40,7 @@ import org.testng.annotations.Test;
|
|||
import java.util.*;
|
||||
|
||||
|
||||
public class GenotypesContextUnitTest extends BaseTest {
|
||||
public class GenotypesContextUnitTest extends VariantBaseTest {
|
||||
Allele Aref, C, T;
|
||||
Genotype AA, AT, TT, AC, CT, CC, MISSING;
|
||||
List<Genotype> allGenotypes;
|
||||
|
|
@ -128,7 +128,7 @@ public class GenotypesContextUnitTest extends BaseTest {
|
|||
// sorted
|
||||
new GenotypesContextProvider(maker, samples);
|
||||
// unsorted
|
||||
new GenotypesContextProvider(maker, Utils.reverse(samples));
|
||||
new GenotypesContextProvider(maker, GeneralUtils.reverse(samples));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -25,15 +25,14 @@
|
|||
|
||||
package org.broadinstitute.variant.variantcontext;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.FeatureCodec;
|
||||
import org.broad.tribble.FeatureCodecHeader;
|
||||
import org.broad.tribble.readers.PositionalBufferedStream;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.variant.VariantBaseTest;
|
||||
import org.broadinstitute.variant.bcf2.BCF2Codec;
|
||||
import org.broadinstitute.variant.utils.GeneralUtils;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.variant.variantcontext.writer.Options;
|
||||
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
|
||||
import org.testng.Assert;
|
||||
|
|
@ -50,8 +49,6 @@ import java.util.*;
|
|||
* @since Date created
|
||||
*/
|
||||
public class VariantContextTestProvider {
|
||||
final protected static Logger logger = Logger.getLogger(VariantContextTestProvider.class);
|
||||
|
||||
final private static boolean ENABLE_GENOTYPE_TESTS = true;
|
||||
final private static boolean ENABLE_A_AND_G_TESTS = true;
|
||||
final private static boolean ENABLE_VARARRAY_TESTS = true;
|
||||
|
|
@ -68,12 +65,12 @@ public class VariantContextTestProvider {
|
|||
|
||||
private final static List<File> testSourceVCFs = new ArrayList<File>();
|
||||
static {
|
||||
testSourceVCFs.add(new File(BaseTest.privateTestDir + "ILLUMINA.wex.broad_phase2_baseline.20111114.both.exome.genotypes.1000.vcf"));
|
||||
testSourceVCFs.add(new File(BaseTest.privateTestDir + "ex2.vcf"));
|
||||
testSourceVCFs.add(new File(BaseTest.privateTestDir + "dbsnp_135.b37.1000.vcf"));
|
||||
testSourceVCFs.add(new File(VariantBaseTest.variantTestDataRoot + "ILLUMINA.wex.broad_phase2_baseline.20111114.both.exome.genotypes.1000.vcf"));
|
||||
testSourceVCFs.add(new File(VariantBaseTest.variantTestDataRoot + "ex2.vcf"));
|
||||
testSourceVCFs.add(new File(VariantBaseTest.variantTestDataRoot + "dbsnp_135.b37.1000.vcf"));
|
||||
if ( ENABLE_SYMBOLIC_ALLELE_TESTS ) {
|
||||
testSourceVCFs.add(new File(BaseTest.privateTestDir + "diagnosis_targets_testfile.vcf"));
|
||||
testSourceVCFs.add(new File(BaseTest.privateTestDir + "VQSR.mixedTest.recal"));
|
||||
testSourceVCFs.add(new File(VariantBaseTest.variantTestDataRoot + "diagnosis_targets_testfile.vcf"));
|
||||
testSourceVCFs.add(new File(VariantBaseTest.variantTestDataRoot + "VQSR.mixedTest.recal"));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -156,12 +153,10 @@ public class VariantContextTestProvider {
|
|||
Pair<VCFHeader, Iterable<VariantContext>> x = readAllVCs( file, codec );
|
||||
List<VariantContext> fullyDecoded = new ArrayList<VariantContext>();
|
||||
|
||||
logger.warn("Reading records from " + file);
|
||||
for ( final VariantContext raw : x.getSecond() ) {
|
||||
if ( raw != null )
|
||||
fullyDecoded.add(raw.fullyDecode(x.getFirst(), false));
|
||||
}
|
||||
logger.warn("Done reading " + file);
|
||||
|
||||
TEST_DATAs.add(new VariantContextTestData(x.getFirst(), fullyDecoded));
|
||||
}
|
||||
|
|
@ -788,12 +783,12 @@ public class VariantContextTestProvider {
|
|||
assertAttributesEquals(actual.getAttributes(), expected.getAttributes());
|
||||
Assert.assertEquals(actual.filtersWereApplied(), expected.filtersWereApplied(), "filtersWereApplied");
|
||||
Assert.assertEquals(actual.isFiltered(), expected.isFiltered(), "isFiltered");
|
||||
BaseTest.assertEqualsSet(actual.getFilters(), expected.getFilters(), "filters");
|
||||
BaseTest.assertEqualsDoubleSmart(actual.getPhredScaledQual(), expected.getPhredScaledQual());
|
||||
VariantBaseTest.assertEqualsSet(actual.getFilters(), expected.getFilters(), "filters");
|
||||
VariantBaseTest.assertEqualsDoubleSmart(actual.getPhredScaledQual(), expected.getPhredScaledQual());
|
||||
|
||||
Assert.assertEquals(actual.hasGenotypes(), expected.hasGenotypes(), "hasGenotypes");
|
||||
if ( expected.hasGenotypes() ) {
|
||||
BaseTest.assertEqualsSet(actual.getSampleNames(), expected.getSampleNames(), "sample names set");
|
||||
VariantBaseTest.assertEqualsSet(actual.getSampleNames(), expected.getSampleNames(), "sample names set");
|
||||
Assert.assertEquals(actual.getSampleNamesOrderedByName(), expected.getSampleNamesOrderedByName(), "sample names");
|
||||
final Set<String> samples = expected.getSampleNames();
|
||||
for ( final String sample : samples ) {
|
||||
|
|
@ -879,7 +874,7 @@ public class VariantContextTestProvider {
|
|||
private static void assertAttributeEquals(final String key, final Object actual, final Object expected) {
|
||||
if ( expected instanceof Double ) {
|
||||
// must be very tolerant because doubles are being rounded to 2 sig figs
|
||||
BaseTest.assertEqualsDoubleSmart(actual, (Double)expected, 1e-2);
|
||||
VariantBaseTest.assertEqualsDoubleSmart(actual, (Double)expected, 1e-2);
|
||||
} else
|
||||
Assert.assertEquals(actual, expected, "Attribute " + key);
|
||||
}
|
||||
|
|
@ -935,7 +930,7 @@ public class VariantContextTestProvider {
|
|||
}
|
||||
|
||||
private static List<List<Allele>> makeAllGenotypes(final List<Allele> alleles, final int highestPloidy) {
|
||||
return Utils.makePermutations(alleles, highestPloidy, true);
|
||||
return GeneralUtils.makePermutations(alleles, highestPloidy, true);
|
||||
}
|
||||
|
||||
public static void assertEquals(final VCFHeader actual, final VCFHeader expected) {
|
||||
|
|
|
|||
|
|
@ -29,9 +29,8 @@ package org.broadinstitute.variant.variantcontext;
|
|||
// the imports for unit testing.
|
||||
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.VariantBaseTest;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.testng.annotations.BeforeSuite;
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.DataProvider;
|
||||
|
|
@ -41,7 +40,7 @@ import org.testng.Assert;
|
|||
import java.util.*;
|
||||
|
||||
|
||||
public class VariantContextUnitTest extends BaseTest {
|
||||
public class VariantContextUnitTest extends VariantBaseTest {
|
||||
Allele A, Aref, C, T, Tref;
|
||||
Allele del, delRef, ATC, ATCref;
|
||||
|
||||
|
|
@ -500,30 +499,30 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
Pair<List<Integer>,byte[]> result;
|
||||
byte[] refBytes = "TATCATCATCGGA".getBytes();
|
||||
|
||||
Assert.assertEquals(GATKVariantContextUtils.findNumberofRepetitions("ATG".getBytes(), "ATGATGATGATG".getBytes()),4);
|
||||
Assert.assertEquals(GATKVariantContextUtils.findNumberofRepetitions("G".getBytes(), "ATGATGATGATG".getBytes()),0);
|
||||
Assert.assertEquals(GATKVariantContextUtils.findNumberofRepetitions("T".getBytes(), "T".getBytes()),1);
|
||||
Assert.assertEquals(GATKVariantContextUtils.findNumberofRepetitions("AT".getBytes(), "ATGATGATCATG".getBytes()),1);
|
||||
Assert.assertEquals(GATKVariantContextUtils.findNumberofRepetitions("CCC".getBytes(), "CCCCCCCC".getBytes()),2);
|
||||
Assert.assertEquals(VariantContextUtils.findNumberofRepetitions("ATG".getBytes(), "ATGATGATGATG".getBytes()),4);
|
||||
Assert.assertEquals(VariantContextUtils.findNumberofRepetitions("G".getBytes(), "ATGATGATGATG".getBytes()),0);
|
||||
Assert.assertEquals(VariantContextUtils.findNumberofRepetitions("T".getBytes(), "T".getBytes()),1);
|
||||
Assert.assertEquals(VariantContextUtils.findNumberofRepetitions("AT".getBytes(), "ATGATGATCATG".getBytes()),1);
|
||||
Assert.assertEquals(VariantContextUtils.findNumberofRepetitions("CCC".getBytes(), "CCCCCCCC".getBytes()),2);
|
||||
|
||||
Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("ATG".getBytes()),3);
|
||||
Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("AAA".getBytes()),1);
|
||||
Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("CACACAC".getBytes()),7);
|
||||
Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("CACACA".getBytes()),2);
|
||||
Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("CATGCATG".getBytes()),4);
|
||||
Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("AATAATA".getBytes()),7);
|
||||
Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("ATG".getBytes()),3);
|
||||
Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("AAA".getBytes()),1);
|
||||
Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("CACACAC".getBytes()),7);
|
||||
Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("CACACA".getBytes()),2);
|
||||
Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("CATGCATG".getBytes()),4);
|
||||
Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("AATAATA".getBytes()),7);
|
||||
|
||||
|
||||
// A*,ATC, context = ATC ATC ATC : (ATC)3 -> (ATC)4
|
||||
VariantContext vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(nullR,atc)).make();
|
||||
result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
|
||||
result = VariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
|
||||
Assert.assertEquals(result.getFirst().toArray()[0],3);
|
||||
Assert.assertEquals(result.getFirst().toArray()[1],4);
|
||||
Assert.assertEquals(result.getSecond().length,3);
|
||||
|
||||
// ATC*,A,ATCATC
|
||||
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+3, Arrays.asList(Allele.create("AATC", true),nullA,atcatc)).make();
|
||||
result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
|
||||
result = VariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
|
||||
Assert.assertEquals(result.getFirst().toArray()[0],3);
|
||||
Assert.assertEquals(result.getFirst().toArray()[1],2);
|
||||
Assert.assertEquals(result.getFirst().toArray()[2],4);
|
||||
|
|
@ -532,7 +531,7 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
// simple non-tandem deletion: CCCC*, -
|
||||
refBytes = "TCCCCCCCCATG".getBytes();
|
||||
vc = new VariantContextBuilder("foo", delLoc, 10, 14, Arrays.asList(ccccR,nullA)).make();
|
||||
result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
|
||||
result = VariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
|
||||
Assert.assertEquals(result.getFirst().toArray()[0],8);
|
||||
Assert.assertEquals(result.getFirst().toArray()[1],4);
|
||||
Assert.assertEquals(result.getSecond().length,1);
|
||||
|
|
@ -540,7 +539,7 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
// CCCC*,CC,-,CCCCCC, context = CCC: (C)7 -> (C)5,(C)3,(C)9
|
||||
refBytes = "TCCCCCCCAGAGAGAG".getBytes();
|
||||
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+4, Arrays.asList(ccccR,cc, nullA,cccccc)).make();
|
||||
result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
|
||||
result = VariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
|
||||
Assert.assertEquals(result.getFirst().toArray()[0],7);
|
||||
Assert.assertEquals(result.getFirst().toArray()[1],5);
|
||||
Assert.assertEquals(result.getFirst().toArray()[2],3);
|
||||
|
|
@ -550,7 +549,7 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
// GAGA*,-,GAGAGAGA
|
||||
refBytes = "TGAGAGAGAGATTT".getBytes();
|
||||
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+4, Arrays.asList(gagaR, nullA,gagagaga)).make();
|
||||
result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
|
||||
result = VariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
|
||||
Assert.assertEquals(result.getFirst().toArray()[0],5);
|
||||
Assert.assertEquals(result.getFirst().toArray()[1],3);
|
||||
Assert.assertEquals(result.getFirst().toArray()[2],7);
|
||||
|
|
|
|||
|
|
@ -26,12 +26,8 @@
|
|||
package org.broadinstitute.variant.variantcontext;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.VariantBaseTest;
|
||||
import org.broadinstitute.variant.utils.GeneralUtils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeSuite;
|
||||
import org.testng.annotations.DataProvider;
|
||||
|
|
@ -41,19 +37,17 @@ import java.io.File;
|
|||
import java.io.FileNotFoundException;
|
||||
import java.util.*;
|
||||
|
||||
public class VariantContextUtilsUnitTest extends BaseTest {
|
||||
public class VariantContextUtilsUnitTest extends VariantBaseTest {
|
||||
Allele Aref, T, C, G, Cref, ATC, ATCATC;
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
@BeforeSuite
|
||||
public void setup() {
|
||||
final File referenceFile = new File(b37KGReference);
|
||||
try {
|
||||
IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(referenceFile);
|
||||
genomeLocParser = new GenomeLocParser(seq);
|
||||
IndexedFastaSequenceFile seq = new IndexedFastaSequenceFile(referenceFile);
|
||||
}
|
||||
catch(FileNotFoundException ex) {
|
||||
throw new UserException.CouldNotReadInputFile(referenceFile,ex);
|
||||
throw new RuntimeException(referenceFile.getAbsolutePath(),ex);
|
||||
}
|
||||
|
||||
// alleles
|
||||
|
|
@ -658,7 +652,7 @@ public class VariantContextUtilsUnitTest extends BaseTest {
|
|||
public void testRepeatDetectorTest(RepeatDetectorTest cfg) {
|
||||
|
||||
// test alleles are equal
|
||||
Assert.assertEquals(GATKVariantContextUtils.isTandemRepeat(cfg.vc, cfg.ref.getBytes()), cfg.isTrueRepeat);
|
||||
Assert.assertEquals(VariantContextUtils.isTandemRepeat(cfg.vc, cfg.ref.getBytes()), cfg.isTrueRepeat);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
|
|
@ -704,7 +698,7 @@ public class VariantContextUtilsUnitTest extends BaseTest {
|
|||
|
||||
@Test(dataProvider = "ReverseClippingPositionTestProvider")
|
||||
public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) {
|
||||
int result = GATKVariantContextUtils.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false);
|
||||
int result = VariantContextUtils.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false);
|
||||
Assert.assertEquals(result, cfg.expectedClip);
|
||||
}
|
||||
|
||||
|
|
@ -782,7 +776,7 @@ public class VariantContextUtilsUnitTest extends BaseTest {
|
|||
|
||||
@Test(dataProvider = "SplitBiallelics")
|
||||
public void testSplitBiallelicsNoGenotypes(final VariantContext vc, final List<VariantContext> expectedBiallelics) {
|
||||
final List<VariantContext> biallelics = GATKVariantContextUtils.splitVariantContextToBiallelics(vc);
|
||||
final List<VariantContext> biallelics = VariantContextUtils.splitVariantContextToBiallelics(vc);
|
||||
Assert.assertEquals(biallelics.size(), expectedBiallelics.size());
|
||||
for ( int i = 0; i < biallelics.size(); i++ ) {
|
||||
final VariantContext actual = biallelics.get(i);
|
||||
|
|
@ -796,14 +790,14 @@ public class VariantContextUtilsUnitTest extends BaseTest {
|
|||
final List<Genotype> genotypes = new ArrayList<Genotype>();
|
||||
|
||||
int sampleI = 0;
|
||||
for ( final List<Allele> alleles : Utils.makePermutations(vc.getAlleles(), 2, true) ) {
|
||||
for ( final List<Allele> alleles : GeneralUtils.makePermutations(vc.getAlleles(), 2, true) ) {
|
||||
genotypes.add(GenotypeBuilder.create("sample" + sampleI++, alleles));
|
||||
}
|
||||
genotypes.add(GenotypeBuilder.createMissing("missing", 2));
|
||||
|
||||
final VariantContext vcWithGenotypes = new VariantContextBuilder(vc).genotypes(genotypes).make();
|
||||
|
||||
final List<VariantContext> biallelics = GATKVariantContextUtils.splitVariantContextToBiallelics(vcWithGenotypes);
|
||||
final List<VariantContext> biallelics = VariantContextUtils.splitVariantContextToBiallelics(vcWithGenotypes);
|
||||
for ( int i = 0; i < biallelics.size(); i++ ) {
|
||||
final VariantContext actual = biallelics.get(i);
|
||||
Assert.assertEquals(actual.getNSamples(), vcWithGenotypes.getNSamples()); // not dropping any samples
|
||||
|
|
|
|||
|
|
@ -25,13 +25,8 @@
|
|||
|
||||
package org.broadinstitute.variant.variantcontext;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.variant.VariantBaseTest;
|
||||
import org.testng.Assert;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.Test;
|
||||
|
|
@ -49,8 +44,7 @@ import java.util.Map;
|
|||
*
|
||||
* Test out parts of the VariantJEXLContext
|
||||
*/
|
||||
public class VariantJEXLContextUnitTest extends BaseTest {
|
||||
|
||||
public class VariantJEXLContextUnitTest extends VariantBaseTest {
|
||||
|
||||
private static String expression = "QUAL > 500.0";
|
||||
private static VariantContextUtils.JexlVCMatchExp exp;
|
||||
|
|
@ -60,27 +54,15 @@ public class VariantJEXLContextUnitTest extends BaseTest {
|
|||
Allele ATC, ATCref;
|
||||
// A [ref] / T at 10
|
||||
|
||||
GenomeLoc snpLoc;
|
||||
// - / ATC [ref] from 20-23
|
||||
|
||||
private static int startingChr = 1;
|
||||
private static int endingChr = 2;
|
||||
private static int readCount = 100;
|
||||
private static int DEFAULT_READ_LENGTH = ArtificialSAMUtils.DEFAULT_READ_LENGTH;
|
||||
static SAMFileHeader header;
|
||||
|
||||
private static GenomeLocParser genomeLocParser;
|
||||
|
||||
@BeforeClass
|
||||
public void beforeClass() {
|
||||
header = ArtificialSAMUtils.createArtificialSamHeader(( endingChr - startingChr ) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
|
||||
genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
||||
try {
|
||||
exp = new VariantContextUtils.JexlVCMatchExp("name", VariantContextUtils.engine.createExpression(expression));
|
||||
} catch (Exception e) {
|
||||
Assert.fail("Unable to create expression" + e.getMessage());
|
||||
}
|
||||
snpLoc = genomeLocParser.createGenomeLoc("chr1", 10, 10, true);
|
||||
}
|
||||
|
||||
@BeforeMethod
|
||||
|
|
@ -142,9 +124,7 @@ public class VariantJEXLContextUnitTest extends BaseTest {
|
|||
private JEXLMap getVarContext() {
|
||||
List<Allele> alleles = Arrays.asList(Aref, T);
|
||||
|
||||
VariantContext vc = new VariantContextBuilder("test", snpLoc.getContig(), snpLoc.getStart(), snpLoc.getStop(), alleles).make();
|
||||
VariantContext vc = new VariantContextBuilder("test", "chr1", 10, 10, alleles).make();
|
||||
return new JEXLMap(Arrays.asList(exp),vc);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,16 +29,11 @@ import net.sf.picard.reference.IndexedFastaSequenceFile;
|
|||
import org.broad.tribble.AbstractFeatureReader;
|
||||
import org.broad.tribble.FeatureReader;
|
||||
import org.broad.tribble.Tribble;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.variant.VariantBaseTest;
|
||||
import org.broadinstitute.variant.vcf.VCFCodec;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
import org.broadinstitute.variant.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.variant.vcf.VCFHeaderVersion;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
|
|
@ -58,22 +53,20 @@ import java.util.*;
|
|||
* <p/>
|
||||
* This class tests out the ability of the VCF writer to correctly write VCF files
|
||||
*/
|
||||
public class VCFWriterUnitTest extends BaseTest {
|
||||
public class VCFWriterUnitTest extends VariantBaseTest {
|
||||
private Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
|
||||
private Set<String> additionalColumns = new HashSet<String>();
|
||||
private File fakeVCFFile = new File("FAKEVCFFILEFORTESTING.vcf");
|
||||
private GenomeLocParser genomeLocParser;
|
||||
private IndexedFastaSequenceFile seq;
|
||||
|
||||
@BeforeClass
|
||||
public void beforeTests() {
|
||||
File referenceFile = new File(hg18Reference);
|
||||
File referenceFile = new File(hg19Reference);
|
||||
try {
|
||||
seq = new CachingIndexedFastaSequenceFile(referenceFile);
|
||||
genomeLocParser = new GenomeLocParser(seq);
|
||||
seq = new IndexedFastaSequenceFile(referenceFile);
|
||||
}
|
||||
catch(FileNotFoundException ex) {
|
||||
throw new UserException.CouldNotReadInputFile(referenceFile,ex);
|
||||
throw new RuntimeException(referenceFile.getAbsolutePath(), ex);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -108,7 +101,7 @@ public class VCFWriterUnitTest extends BaseTest {
|
|||
fakeVCFFile.delete();
|
||||
}
|
||||
catch (IOException e ) {
|
||||
throw new ReviewedStingException(e.getMessage());
|
||||
throw new RuntimeException(e.getMessage());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -133,8 +126,6 @@ public class VCFWriterUnitTest extends BaseTest {
|
|||
* @return a VCFRecord
|
||||
*/
|
||||
private VariantContext createVC(VCFHeader header) {
|
||||
|
||||
GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1",1);
|
||||
List<Allele> alleles = new ArrayList<Allele>();
|
||||
Set<String> filters = null;
|
||||
Map<String, Object> attributes = new HashMap<String,Object>();
|
||||
|
|
@ -148,7 +139,7 @@ public class VCFWriterUnitTest extends BaseTest {
|
|||
Genotype gt = new GenotypeBuilder(name,alleles.subList(1,2)).GQ(0).attribute("BB", "1").phased(true).make();
|
||||
genotypes.add(gt);
|
||||
}
|
||||
return new VariantContextBuilder("RANDOM", loc.getContig(), loc.getStart(), loc.getStop(), alleles)
|
||||
return new VariantContextBuilder("RANDOM", "chr1", 1, 1, alleles)
|
||||
.genotypes(genotypes).attributes(attributes).make();
|
||||
}
|
||||
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue