Move some VCF/VariantContext methods back to the GATK based on feedback
-Moved some of the more specialized / complex VariantContext and VCF utility methods back to the GATK. -Due to this re-shuffling, was able to return things like the Pair class back to the GATK as well.
This commit is contained in:
parent
581df64197
commit
a536e1da84
|
|
@ -57,7 +57,7 @@ import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
|||
import org.broadinstitute.sting.utils.MannWhitneyU;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.variant.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
|
|
|
|||
|
|
@ -53,8 +53,8 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.vcf.VCFHeaderLineCount;
|
||||
import org.broadinstitute.variant.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.variant.vcf.VCFInfoHeaderLine;
|
||||
|
|
@ -79,7 +79,7 @@ public class TandemRepeatAnnotator extends InfoFieldAnnotation implements Standa
|
|||
if ( !vc.isIndel())
|
||||
return null;
|
||||
|
||||
Pair<List<Integer>,byte[]> result = VariantContextUtils.getNumTandemRepeatUnits(vc, ref.getForwardBases());
|
||||
Pair<List<Integer>,byte[]> result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, ref.getForwardBases());
|
||||
if (result == null)
|
||||
return null;
|
||||
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ import org.broadinstitute.sting.utils.MathUtils;
|
|||
import org.broadinstitute.variant.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
|||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
||||
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ import net.sf.samtools.CigarElement;
|
|||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.sting.gatk.downsampling.ReservoirDownsampler;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;
|
||||
|
|
|
|||
|
|
@ -53,7 +53,8 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
@ -288,7 +289,7 @@ public class ConsensusAlleleCounter {
|
|||
if (vcs.isEmpty())
|
||||
return Collections.emptyList(); // nothing else to do, no alleles passed minimum count criterion
|
||||
|
||||
final VariantContext mergedVC = VariantContextUtils.simpleMerge(vcs, null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.UNSORTED, false, false, null, false, false);
|
||||
final VariantContext mergedVC = GATKVariantContextUtils.simpleMerge(vcs, null, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, GATKVariantContextUtils.GenotypeMergeType.UNSORTED, false, false, null, false, false);
|
||||
return mergedVC.getAlleles();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactACcounts;
|
|||
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactACset;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.variant.vcf.VCFConstants;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
|
|||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.variant.vcf.VCFConstants;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
|
|
|
|||
|
|
@ -49,8 +49,8 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.pairhmm.PairHMM;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
|
||||
|
||||
public class UnifiedArgumentCollection extends StandardCallerArgumentCollection {
|
||||
|
||||
|
|
@ -172,7 +172,7 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
|
|||
Sample ploidy - equivalent to number of chromosomes per pool. In pooled experiments this should be = # of samples in pool * individual sample ploidy
|
||||
*/
|
||||
@Argument(shortName="ploidy", fullName="sample_ploidy", doc="Plody (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy).", required=false)
|
||||
public int samplePloidy = VariantContextUtils.DEFAULT_PLOIDY;
|
||||
public int samplePloidy = GATKVariantContextUtils.DEFAULT_PLOIDY;
|
||||
|
||||
@Hidden
|
||||
@Argument(shortName="minqs", fullName="min_quality_score", doc="Min quality score to consider. Smaller numbers process faster. Default: Q1.", required=false)
|
||||
|
|
|
|||
|
|
@ -61,7 +61,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
|
@ -304,7 +304,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
|||
headerInfo.add(new VCFInfoHeaderLine(UnifiedGenotyperEngine.NUMBER_OF_DISCOVERED_ALLELES_KEY, 1, VCFHeaderLineType.Integer, "Number of alternate alleles discovered (but not necessarily genotyped) at this site"));
|
||||
|
||||
// add the pool values for each genotype
|
||||
if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) {
|
||||
if (UAC.samplePloidy != GATKVariantContextUtils.DEFAULT_PLOIDY) {
|
||||
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the alternate allele count, in the same order as listed, for each individual sample"));
|
||||
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the alternate allele fraction, in the same order as listed, for each individual sample"));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -61,6 +61,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcResult;
|
|||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.utils.BaseUtils;
|
||||
import org.broadinstitute.variant.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -134,7 +135,7 @@ public class UnifiedGenotyperEngine {
|
|||
// ---------------------------------------------------------------------------------------------------------
|
||||
@Requires({"toolkit != null", "UAC != null"})
|
||||
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) {
|
||||
this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), VariantContextUtils.DEFAULT_PLOIDY);
|
||||
this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), GATKVariantContextUtils.DEFAULT_PLOIDY);
|
||||
}
|
||||
|
||||
@Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0","ploidy>0"})
|
||||
|
|
@ -525,7 +526,7 @@ public class UnifiedGenotyperEngine {
|
|||
// if we are subsetting alleles (either because there were too many or because some were not polymorphic)
|
||||
// then we may need to trim the alleles (because the original VariantContext may have had to pad at the end).
|
||||
if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext ) // limitedContext callers need to handle allele trimming on their own to keep their perReadAlleleLikelihoodMap alleles in sync
|
||||
vcCall = VariantContextUtils.reverseTrimAlleles(vcCall);
|
||||
vcCall = GATKVariantContextUtils.reverseTrimAlleles(vcCall);
|
||||
|
||||
if ( annotationEngine != null && !limitedContext ) { // limitedContext callers need to handle annotations on their own by calling their own annotationEngine
|
||||
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
|
||||
|
|
@ -662,7 +663,7 @@ public class UnifiedGenotyperEngine {
|
|||
private void determineGLModelsToUse() {
|
||||
|
||||
String modelPrefix = "";
|
||||
if ( !UAC.GLmodel.name().contains(GPSTRING) && UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY )
|
||||
if ( !UAC.GLmodel.name().contains(GPSTRING) && UAC.samplePloidy != GATKVariantContextUtils.DEFAULT_PLOIDY )
|
||||
modelPrefix = GPSTRING;
|
||||
|
||||
if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") ) {
|
||||
|
|
|
|||
|
|
@ -47,6 +47,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -105,7 +106,7 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
|
|||
alleles.add(vc.getReference());
|
||||
alleles.addAll(chooseMostLikelyAlternateAlleles(vc, getMaxAltAlleles()));
|
||||
builder.alleles(alleles);
|
||||
builder.genotypes(VariantContextUtils.subsetDiploidAlleles(vc, alleles, false));
|
||||
builder.genotypes(GATKVariantContextUtils.subsetDiploidAlleles(vc, alleles, false));
|
||||
return builder.make();
|
||||
} else {
|
||||
return vc;
|
||||
|
|
@ -351,6 +352,6 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
|
|||
final List<Allele> allelesToUse,
|
||||
final boolean assignGenotypes,
|
||||
final int ploidy) {
|
||||
return VariantContextUtils.subsetDiploidAlleles(vc, allelesToUse, assignGenotypes);
|
||||
return GATKVariantContextUtils.subsetDiploidAlleles(vc, allelesToUse, assignGenotypes);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,10 +47,10 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.variant.variantcontext.Genotype;
|
||||
import org.broadinstitute.variant.variantcontext.GenotypesContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
|
|
@ -92,7 +92,7 @@ abstract class ExactAFCalc extends AFCalc {
|
|||
if ( sample.hasLikelihoods() ) {
|
||||
double[] gls = sample.getLikelihoods().getAsVector();
|
||||
|
||||
if ( MathUtils.sum(gls) < VariantContextUtils.SUM_GL_THRESH_NOCALL )
|
||||
if ( MathUtils.sum(gls) < GATKVariantContextUtils.SUM_GL_THRESH_NOCALL )
|
||||
genotypeLikelihoods.add(gls);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -48,6 +48,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
|||
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.GeneralPloidyGenotypeLikelihoods;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
|
|
@ -553,7 +554,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
|
|||
}
|
||||
|
||||
// if there is no mass on the (new) likelihoods, then just no-call the sample
|
||||
if ( MathUtils.sum(newLikelihoods) > VariantContextUtils.SUM_GL_THRESH_NOCALL ) {
|
||||
if ( MathUtils.sum(newLikelihoods) > GATKVariantContextUtils.SUM_GL_THRESH_NOCALL ) {
|
||||
newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES));
|
||||
}
|
||||
else {
|
||||
|
|
@ -565,7 +566,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
|
|||
gb.PL(newLikelihoods);
|
||||
|
||||
// if we weren't asked to assign a genotype, then just no-call the sample
|
||||
if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > VariantContextUtils.SUM_GL_THRESH_NOCALL )
|
||||
if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > GATKVariantContextUtils.SUM_GL_THRESH_NOCALL )
|
||||
gb.alleles(NO_CALL_ALLELES);
|
||||
else
|
||||
assignGenotype(gb, newLikelihoods, allelesToUse, ploidy);
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@ import org.broadinstitute.sting.utils.*;
|
|||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.utils.BaseUtils;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
|
||||
|
|
@ -173,7 +174,7 @@ public class GenotypingEngine {
|
|||
validatePriorityList( priorityList, eventsAtThisLoc );
|
||||
|
||||
// Merge the event to find a common reference representation
|
||||
final VariantContext mergedVC = VariantContextUtils.simpleMerge(eventsAtThisLoc, priorityList, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
|
||||
final VariantContext mergedVC = GATKVariantContextUtils.simpleMerge(eventsAtThisLoc, priorityList, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
|
||||
if( mergedVC == null ) { continue; }
|
||||
|
||||
if( eventsAtThisLoc.size() != mergedVC.getAlternateAlleles().size() ) {
|
||||
|
|
@ -203,7 +204,7 @@ public class GenotypingEngine {
|
|||
VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, call);
|
||||
|
||||
if( annotatedCall.getAlleles().size() != mergedVC.getAlleles().size() ) { // some alleles were removed so reverseTrimming might be necessary!
|
||||
annotatedCall = VariantContextUtils.reverseTrimAlleles(annotatedCall);
|
||||
annotatedCall = GATKVariantContextUtils.reverseTrimAlleles(annotatedCall);
|
||||
}
|
||||
|
||||
returnCalls.add( annotatedCall );
|
||||
|
|
|
|||
|
|
@ -72,6 +72,7 @@ import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
|||
import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileState;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
|
|
@ -297,7 +298,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
samplesList.addAll( samples );
|
||||
// initialize the UnifiedGenotyper Engine which is used to call into the exact model
|
||||
final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection( SCAC ); // this adapter is used so that the full set of unused UG arguments aren't exposed to the HC user
|
||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
|
||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY);
|
||||
|
||||
// create a UAC but with the exactCallsLog = null, so we only output the log for the HC caller itself, if requested
|
||||
UnifiedArgumentCollection simpleUAC = new UnifiedArgumentCollection(UAC);
|
||||
|
|
@ -307,7 +308,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
simpleUAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.min( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING ); // low values used for isActive determination only, default/user-specified values used for actual calling
|
||||
simpleUAC.CONTAMINATION_FRACTION = 0.0;
|
||||
simpleUAC.exactCallsLog = null;
|
||||
UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), simpleUAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
|
||||
UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), simpleUAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY);
|
||||
|
||||
// initialize the output VCF header
|
||||
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ import org.broadinstitute.sting.gatk.walkers.BAQMode;
|
|||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
|
|||
|
|
@ -59,6 +59,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
|||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
|
||||
|
|
@ -444,7 +445,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
ArrayList<String> rodNames = new ArrayList<String>();
|
||||
rodNames.add(variantCollection.variants.getName());
|
||||
Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames);
|
||||
Set<String> vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
|
||||
Set<String> vcfSamples = SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
|
||||
|
||||
//Get the trios from the families passed as ped
|
||||
setTrios();
|
||||
|
|
|
|||
|
|
@ -58,12 +58,12 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
|||
import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
import org.broadinstitute.variant.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextBuilder;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.variant.vcf.VCFUtils;
|
||||
|
||||
|
|
@ -327,7 +327,7 @@ public class GenotypeAndValidate extends RodWalker<GenotypeAndValidate.CountedDa
|
|||
// Initialize VCF header
|
||||
if (vcfWriter != null) {
|
||||
Map<String, VCFHeader> header = GATKVCFUtils.getVCFHeadersFromRodPrefix(getToolkit(), alleles.getName());
|
||||
samples = SampleUtils.getSampleList(header, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
|
||||
samples = SampleUtils.getSampleList(header, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
|
||||
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(header.values(), true);
|
||||
headerLines.add(new VCFHeaderLine("source", "GenotypeAndValidate"));
|
||||
vcfWriter.writeHeader(new VCFHeader(headerLines, samples));
|
||||
|
|
|
|||
|
|
@ -54,12 +54,12 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
import org.broadinstitute.variant.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -227,7 +227,7 @@ public class ValidationSiteSelector extends RodWalker<Integer, Integer> {
|
|||
public void initialize() {
|
||||
// Get list of samples to include in the output
|
||||
Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit());
|
||||
TreeSet<String> vcfSamples = new TreeSet<String>(SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
|
||||
TreeSet<String> vcfSamples = new TreeSet<String>(SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
|
||||
|
||||
Collection<String> samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFiles);
|
||||
Collection<String> samplesFromExpressions = SampleUtils.matchSamplesExpressions(vcfSamples, sampleExpressions);
|
||||
|
|
|
|||
|
|
@ -61,6 +61,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
|||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
|
|
@ -115,7 +116,7 @@ public class RegenotypeVariants extends RodWalker<Integer, Integer> implements T
|
|||
|
||||
String trackName = variantCollection.variants.getName();
|
||||
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName));
|
||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
|
||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY);
|
||||
|
||||
final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(trackName)));
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ import com.google.java.contract.Requires;
|
|||
import org.apache.commons.math.MathException;
|
||||
import org.apache.commons.math.stat.inference.ChiSquareTestImpl;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.Collection;
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ import org.broadinstitute.sting.utils.Utils;
|
|||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
|||
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||
|
||||
|
|
|
|||
|
|
@ -51,9 +51,9 @@ import com.google.java.contract.Requires;
|
|||
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.utils.BaseUtils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
|
|
@ -112,7 +112,7 @@ public abstract class RepeatCovariate implements ExperimentalCovariate {
|
|||
|
||||
// get backward repeat unit and # repeats
|
||||
byte[] backwardRepeatUnit = Arrays.copyOfRange(readBases, offset - str + 1, offset + 1);
|
||||
maxBW = VariantContextUtils.findNumberofRepetitions(backwardRepeatUnit, Arrays.copyOfRange(readBases, 0, offset + 1), false);
|
||||
maxBW = GATKVariantContextUtils.findNumberofRepetitions(backwardRepeatUnit, Arrays.copyOfRange(readBases, 0, offset + 1), false);
|
||||
if (maxBW > 1) {
|
||||
bestBWRepeatUnit = backwardRepeatUnit.clone();
|
||||
break;
|
||||
|
|
@ -132,7 +132,7 @@ public abstract class RepeatCovariate implements ExperimentalCovariate {
|
|||
|
||||
// get forward repeat unit and # repeats
|
||||
byte[] forwardRepeatUnit = Arrays.copyOfRange(readBases, offset +1, offset+str+1);
|
||||
maxFW = VariantContextUtils.findNumberofRepetitions(forwardRepeatUnit,Arrays.copyOfRange(readBases, offset+1, readBases.length), true);
|
||||
maxFW = GATKVariantContextUtils.findNumberofRepetitions(forwardRepeatUnit, Arrays.copyOfRange(readBases, offset + 1, readBases.length), true);
|
||||
if (maxFW > 1) {
|
||||
bestFWRepeatUnit = forwardRepeatUnit.clone();
|
||||
break;
|
||||
|
|
@ -150,7 +150,7 @@ public abstract class RepeatCovariate implements ExperimentalCovariate {
|
|||
// but correct representation at that place might be (C)4.
|
||||
// Hence, if the FW and BW units don't match, check if BW unit can still be a part of FW unit and add
|
||||
// representations to total
|
||||
maxBW = VariantContextUtils.findNumberofRepetitions(bestFWRepeatUnit, Arrays.copyOfRange(readBases, 0, offset + 1), false);
|
||||
maxBW = GATKVariantContextUtils.findNumberofRepetitions(bestFWRepeatUnit, Arrays.copyOfRange(readBases, 0, offset + 1), false);
|
||||
maxRL = maxFW + maxBW;
|
||||
bestRepeatUnit = bestFWRepeatUnit;
|
||||
|
||||
|
|
|
|||
|
|
@ -48,18 +48,6 @@ package org.broadinstitute.sting.utils.recalibration.covariates;
|
|||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.utils.BaseUtils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
|
||||
public class RepeatUnitAndLengthCovariate extends RepeatCovariate {
|
||||
|
|
|
|||
|
|
@ -46,20 +46,6 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.recalibration.covariates;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.variant.utils.BaseUtils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.variant.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
import org.testng.Assert;
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
|||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.testng.Assert;
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.manage
|
|||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
|
|||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.variant.utils.BaseUtils;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
|
|
|
|||
|
|
@ -46,21 +46,17 @@
|
|||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.*;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.utils.BaseUtils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Random;
|
||||
|
|
@ -89,38 +85,38 @@ public class RepeatCovariatesUnitTest {
|
|||
@Test(enabled = true)
|
||||
public void testFindNumberOfRepetitions() {
|
||||
// First, test logic to compute number of repetitions of a substring on a given string.
|
||||
int result = VariantContextUtils.findNumberofRepetitions("AC".getBytes(), "ACAC".getBytes(), true);
|
||||
int result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "ACAC".getBytes(), true);
|
||||
Assert.assertEquals(2,result);
|
||||
result = VariantContextUtils.findNumberofRepetitions("AC".getBytes(),"ACACACAC".getBytes(), true);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "ACACACAC".getBytes(), true);
|
||||
Assert.assertEquals(4,result);
|
||||
result = VariantContextUtils.findNumberofRepetitions("AC".getBytes(),"ACACACACGT".getBytes(), true);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "ACACACACGT".getBytes(), true);
|
||||
Assert.assertEquals(4,result);
|
||||
result = VariantContextUtils.findNumberofRepetitions("AC".getBytes(),"GTACACACAC".getBytes(), true);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "GTACACACAC".getBytes(), true);
|
||||
Assert.assertEquals(0,result);
|
||||
result = VariantContextUtils.findNumberofRepetitions("GCA".getBytes(),"GTAGGGT".getBytes(), true);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("GCA".getBytes(), "GTAGGGT".getBytes(), true);
|
||||
Assert.assertEquals(0,result);
|
||||
result = VariantContextUtils.findNumberofRepetitions("GCAGCA".getBytes(),"GCAGCAGTAGGGTGTACACACAC".getBytes(), true);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("GCAGCA".getBytes(), "GCAGCAGTAGGGTGTACACACAC".getBytes(), true);
|
||||
Assert.assertEquals(1,result);
|
||||
result = VariantContextUtils.findNumberofRepetitions("GCAGCA".getBytes(),"GTAGGGTGTACACACACGCAGCAT".getBytes(), true);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("GCAGCA".getBytes(), "GTAGGGTGTACACACACGCAGCAT".getBytes(), true);
|
||||
Assert.assertEquals(0,result);
|
||||
result = VariantContextUtils.findNumberofRepetitions("GCA".getBytes(),"GTAGGGTGTACACACACGCAGCAGCA".getBytes(), true);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("GCA".getBytes(), "GTAGGGTGTACACACACGCAGCAGCA".getBytes(), true);
|
||||
Assert.assertEquals(0,result);
|
||||
// Same tests but looking backward on string
|
||||
result = VariantContextUtils.findNumberofRepetitions("AC".getBytes(),"ACAC".getBytes(), false);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "ACAC".getBytes(), false);
|
||||
Assert.assertEquals(2,result);
|
||||
result = VariantContextUtils.findNumberofRepetitions("AC".getBytes(),"ACACACAC".getBytes(), false);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "ACACACAC".getBytes(), false);
|
||||
Assert.assertEquals(4,result);
|
||||
result = VariantContextUtils.findNumberofRepetitions("AC".getBytes(),"ACACACACGT".getBytes(), false);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "ACACACACGT".getBytes(), false);
|
||||
Assert.assertEquals(0,result);
|
||||
result = VariantContextUtils.findNumberofRepetitions("AC".getBytes(),"GTACACACAC".getBytes(), false);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "GTACACACAC".getBytes(), false);
|
||||
Assert.assertEquals(4,result);
|
||||
result = VariantContextUtils.findNumberofRepetitions("GCA".getBytes(),"GTAGGGT".getBytes(), false);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("GCA".getBytes(), "GTAGGGT".getBytes(), false);
|
||||
Assert.assertEquals(0,result);
|
||||
result = VariantContextUtils.findNumberofRepetitions("GCAGCA".getBytes(),"GCAGCAGTAGGGTGTACACACAC".getBytes(), false);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("GCAGCA".getBytes(), "GCAGCAGTAGGGTGTACACACAC".getBytes(), false);
|
||||
Assert.assertEquals(0,result);
|
||||
result = VariantContextUtils.findNumberofRepetitions("GCAGCA".getBytes(),"GTAGGGTGTACACACACGCAGCAT".getBytes(), false);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("GCAGCA".getBytes(), "GTAGGGTGTACACACACGCAGCAT".getBytes(), false);
|
||||
Assert.assertEquals(0,result);
|
||||
result = VariantContextUtils.findNumberofRepetitions("GCA".getBytes(),"GTAGGGTGTACACACACGCAGCAGCA".getBytes(), false);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("GCA".getBytes(), "GTAGGGTGTACACACACGCAGCAGCA".getBytes(), false);
|
||||
Assert.assertEquals(3,result);
|
||||
|
||||
// test logic to get repeat unit and number of repeats from covariate value
|
||||
|
|
@ -208,8 +204,8 @@ public class RepeatCovariatesUnitTest {
|
|||
Assert.assertEquals(rurlValM,rurlValI);
|
||||
|
||||
|
||||
int fw = VariantContextUtils.findNumberofRepetitions(ruValM.getBytes(), readBases.substring(offset+1,readLength).getBytes(),true);
|
||||
int bw = VariantContextUtils.findNumberofRepetitions(ruValM.getBytes(), readBases.substring(0,offset+1).getBytes(),false);
|
||||
int fw = GATKVariantContextUtils.findNumberofRepetitions(ruValM.getBytes(), readBases.substring(offset+1,readLength).getBytes(),true);
|
||||
int bw = GATKVariantContextUtils.findNumberofRepetitions(ruValM.getBytes(), readBases.substring(0,offset+1).getBytes(),false);
|
||||
Assert.assertEquals(Math.min(fw+bw,RepeatCovariate.MAX_REPEAT_LENGTH),(int)Integer.valueOf(rlValM));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ import org.apache.log4j.Logger;
|
|||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.classloader.JVMUtils;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.ApplicationDetails;
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
|
|||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.codecs.hapmap.RawHapMapFeature;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -200,7 +201,7 @@ public class VariantContextAdaptors {
|
|||
if ( isSNP(dbsnp) || isMNP(dbsnp) )
|
||||
addPaddingBase = false;
|
||||
else if ( isIndel(dbsnp) || dbsnp.getVariantType().contains("mixed") )
|
||||
addPaddingBase = refBaseIsDash || VariantContextUtils.requiresPaddingBase(stripNullDashes(getAlleleList(dbsnp)));
|
||||
addPaddingBase = refBaseIsDash || GATKVariantContextUtils.requiresPaddingBase(stripNullDashes(getAlleleList(dbsnp)));
|
||||
else
|
||||
return null; // can't handle anything else
|
||||
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
|||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.file.FSLockWithShared;
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ import org.broadinstitute.sting.gatk.samples.Sample;
|
|||
import org.broadinstitute.sting.gatk.samples.SampleDB;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.recalibration.BQSRMode;
|
||||
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
|
|
@ -249,7 +250,7 @@ public class VariantAnnotatorEngine {
|
|||
private VariantContext annotateDBs(final RefMetaDataTracker tracker, final GenomeLoc loc, VariantContext vc, final Map<String, Object> infoAnnotations) {
|
||||
for ( Map.Entry<RodBinding<VariantContext>, String> dbSet : dbAnnotations.entrySet() ) {
|
||||
if ( dbSet.getValue().equals(VCFConstants.DBSNP_KEY) ) {
|
||||
final String rsID = VCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbSet.getKey(), loc), vc.getType());
|
||||
final String rsID = GATKVCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbSet.getKey(), loc), vc.getType());
|
||||
|
||||
// add the ID if appropriate
|
||||
if ( rsID != null ) {
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.refseq.RefSeqCodec;
|
||||
import org.broadinstitute.sting.utils.codecs.refseq.RefSeqFeature;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.variant.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RefWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RefWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RefWalker;
|
||||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
||||
import java.util.Collections;
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ import org.broadinstitute.sting.utils.Utils;
|
|||
import org.broadinstitute.sting.utils.clipping.ClippingOp;
|
||||
import org.broadinstitute.sting.utils.clipping.ClippingRepresentation;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
|
|
|
|||
|
|
@ -49,6 +49,7 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.VariantEvalUtils;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -197,7 +198,7 @@ public class VariantEval extends RodWalker<Integer, Integer> implements TreeRedu
|
|||
protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50;
|
||||
|
||||
@Argument(shortName="ploidy", fullName="samplePloidy", doc="Per-sample ploidy (number of chromosomes per sample)", required=false)
|
||||
protected int ploidy = VariantContextUtils.DEFAULT_PLOIDY;
|
||||
protected int ploidy = GATKVariantContextUtils.DEFAULT_PLOIDY;
|
||||
|
||||
@Argument(fullName="ancestralAlignments", shortName="aa", doc="Fasta file with ancestral alleles", required=false)
|
||||
private File ancestralAlignmentsFile = null;
|
||||
|
|
@ -285,7 +286,7 @@ public class VariantEval extends RodWalker<Integer, Integer> implements TreeRedu
|
|||
|
||||
// Now that we have all the rods categorized, determine the sample list from the eval rods.
|
||||
Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), evals);
|
||||
Set<String> vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
|
||||
Set<String> vcfSamples = SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
|
||||
|
||||
// Load the sample list, using an intermediate tree set to sort the samples
|
||||
final Set<String> allSampleNames = SampleUtils.getSamplesFromCommandLineInput(vcfSamples);
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
|||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.AnalysisModuleScanner;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.EvaluationContext;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
|
||||
|
|
|
|||
|
|
@ -27,8 +27,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
|
|||
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
|
@ -51,7 +51,7 @@ public class TandemRepeat extends VariantStratifier {
|
|||
public List<Object> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
if ( eval == null || ! eval.isIndel() )
|
||||
return ALL;
|
||||
else if ( VariantContextUtils.isTandemRepeat(eval, ref.getForwardBases()) ) {
|
||||
else if ( GATKVariantContextUtils.isTandemRepeat(eval, ref.getForwardBases()) ) {
|
||||
print("REPEAT", eval, ref);
|
||||
return REPEAT;
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.manage
|
|||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.*;
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ import org.broadinstitute.sting.gatk.walkers.Window;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCountConstants;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
|
@ -135,14 +136,14 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
|||
protected VariantContextWriter vcfWriter = null;
|
||||
|
||||
@Argument(shortName="genotypeMergeOptions", doc="Determines how we should merge genotype records for samples shared across the ROD files", required=false)
|
||||
public VariantContextUtils.GenotypeMergeType genotypeMergeOption = null;
|
||||
public GATKVariantContextUtils.GenotypeMergeType genotypeMergeOption = null;
|
||||
|
||||
@Argument(shortName="filteredRecordsMergeType", doc="Determines how we should handle records seen at the same site in the VCF, but with different FILTER fields", required=false)
|
||||
public VariantContextUtils.FilteredRecordMergeType filteredRecordsMergeType = VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED;
|
||||
public GATKVariantContextUtils.FilteredRecordMergeType filteredRecordsMergeType = GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED;
|
||||
|
||||
@Hidden
|
||||
@Argument(shortName="multipleAllelesMergeType", doc="Determines how we should handle records seen at the same site in the VCF, but with different allele types (for example, SNP vs. indel)", required=false)
|
||||
public VariantContextUtils.MultipleAllelesMergeType multipleAllelesMergeType = VariantContextUtils.MultipleAllelesMergeType.BY_TYPE;
|
||||
public GATKVariantContextUtils.MultipleAllelesMergeType multipleAllelesMergeType = GATKVariantContextUtils.MultipleAllelesMergeType.BY_TYPE;
|
||||
|
||||
/**
|
||||
* Used when taking the union of variants that contain genotypes. A complete priority list MUST be provided.
|
||||
|
|
@ -203,12 +204,12 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
|||
|
||||
validateAnnotateUnionArguments();
|
||||
if ( PRIORITY_STRING == null && genotypeMergeOption == null) {
|
||||
genotypeMergeOption = VariantContextUtils.GenotypeMergeType.UNSORTED;
|
||||
genotypeMergeOption = GATKVariantContextUtils.GenotypeMergeType.UNSORTED;
|
||||
//PRIORITY_STRING = Utils.join(",", vcfRods.keySet()); Deleted by Ami (7/10/12)
|
||||
logger.info("Priority string is not provided, using arbitrary genotyping order: "+priority);
|
||||
}
|
||||
|
||||
if (genotypeMergeOption == VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE &&
|
||||
if (genotypeMergeOption == GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE &&
|
||||
!SampleUtils.verifyUniqueSamplesNames(vcfRods))
|
||||
throw new IllegalStateException("REQUIRE_UNIQUE sample names is true but duplicate names were discovered.");
|
||||
|
||||
|
|
@ -232,7 +233,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
|||
private void validateAnnotateUnionArguments() {
|
||||
Set<String> rodNames = SampleUtils.getRodNamesWithVCFHeader(getToolkit(), null);
|
||||
|
||||
if ( genotypeMergeOption == VariantContextUtils.GenotypeMergeType.PRIORITIZE && PRIORITY_STRING == null )
|
||||
if ( genotypeMergeOption == GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE && PRIORITY_STRING == null )
|
||||
throw new UserException.MissingArgument("rod_priority_list", "Priority string must be provided if you want to prioritize genotypes");
|
||||
|
||||
if ( PRIORITY_STRING != null){
|
||||
|
|
@ -278,7 +279,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
|||
|
||||
List<VariantContext> mergedVCs = new ArrayList<VariantContext>();
|
||||
|
||||
if (multipleAllelesMergeType == VariantContextUtils.MultipleAllelesMergeType.BY_TYPE) {
|
||||
if (multipleAllelesMergeType == GATKVariantContextUtils.MultipleAllelesMergeType.BY_TYPE) {
|
||||
Map<VariantContext.Type, List<VariantContext>> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs);
|
||||
|
||||
// TODO -- clean this up in a refactoring
|
||||
|
|
@ -296,13 +297,13 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
|||
// iterate over the types so that it's deterministic
|
||||
for (VariantContext.Type type : VariantContext.Type.values()) {
|
||||
if (VCsByType.containsKey(type))
|
||||
mergedVCs.add(VariantContextUtils.simpleMerge(VCsByType.get(type),
|
||||
priority, rodNames.size() , filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
|
||||
mergedVCs.add(GATKVariantContextUtils.simpleMerge(VCsByType.get(type),
|
||||
priority, rodNames.size(), filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
|
||||
SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
|
||||
}
|
||||
}
|
||||
else if (multipleAllelesMergeType == VariantContextUtils.MultipleAllelesMergeType.MIX_TYPES) {
|
||||
mergedVCs.add(VariantContextUtils.simpleMerge(vcs,
|
||||
else if (multipleAllelesMergeType == GATKVariantContextUtils.MultipleAllelesMergeType.MIX_TYPES) {
|
||||
mergedVCs.add(GATKVariantContextUtils.simpleMerge(vcs,
|
||||
priority, rodNames.size(), filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
|
||||
SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
|
|
|
|||
|
|
@ -39,12 +39,12 @@ import org.broadinstitute.sting.utils.SampleUtils;
|
|||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.sting.utils.text.ListFileUtils;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
|
|
@ -204,7 +204,7 @@ public class SelectHeaders extends RodWalker<Integer, Integer> implements TreeRe
|
|||
}
|
||||
}
|
||||
|
||||
TreeSet<String> vcfSamples = new TreeSet<String>(SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
|
||||
TreeSet<String> vcfSamples = new TreeSet<String>(SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
|
||||
VCFHeader vcfHeader = new VCFHeader(headerLines, vcfSamples);
|
||||
vcfHeader.setWriteEngineHeaders(includeEngineHeaders);
|
||||
vcfWriter.writeHeader(vcfHeader);
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@ import org.broadinstitute.sting.utils.MendelianViolation;
|
|||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
|
@ -337,7 +338,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
List<String> rodNames = Arrays.asList(variantCollection.variants.getName());
|
||||
|
||||
vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames);
|
||||
TreeSet<String> vcfSamples = new TreeSet<String>(SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
|
||||
TreeSet<String> vcfSamples = new TreeSet<String>(SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
|
||||
|
||||
Collection<String> samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFiles);
|
||||
Collection<String> samplesFromExpressions = SampleUtils.matchSamplesExpressions(vcfSamples, sampleExpressions);
|
||||
|
|
@ -661,7 +662,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
|
||||
// if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs and AD (because they are no longer accurate)
|
||||
if ( vc.getAlleles().size() != sub.getAlleles().size() )
|
||||
newGC = VariantContextUtils.stripPLsAndAD(sub.getGenotypes());
|
||||
newGC = GATKVariantContextUtils.stripPLsAndAD(sub.getGenotypes());
|
||||
|
||||
// if we have fewer samples in the selected VC than in the original VC, we need to strip out the MLE tags
|
||||
if ( vc.getNSamples() != sub.getNSamples() ) {
|
||||
|
|
|
|||
|
|
@ -35,13 +35,13 @@ import org.broadinstitute.sting.gatk.walkers.*;
|
|||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextBuilder;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -256,7 +256,7 @@ public class VariantValidationAssessor extends RodWalker<VariantContext,Integer>
|
|||
//if ( popFile != null ) {
|
||||
// throw new StingException("We still need to implement this!");
|
||||
//} else {
|
||||
return VariantContextUtils.computeHardyWeinbergPvalue(vc);
|
||||
return GATKVariantContextUtils.computeHardyWeinbergPvalue(vc);
|
||||
//}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ import org.broadinstitute.sting.commandline.*;
|
|||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.VCFConstants;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
|
@ -180,7 +181,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
|||
|
||||
if ( !genotypeFieldsToTake.isEmpty() ) {
|
||||
Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), variants);
|
||||
TreeSet<String> vcfSamples = new TreeSet<String>(SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
|
||||
TreeSet<String> vcfSamples = new TreeSet<String>(SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
|
||||
samples.addAll(vcfSamples);
|
||||
|
||||
// optimization: if there are no samples, we don't have to worry about any genotype fields
|
||||
|
|
|
|||
|
|
@ -119,7 +119,7 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
|||
if ( tracker == null || !BaseUtils.isRegularBase(ref.getBase()) )
|
||||
return 0;
|
||||
|
||||
String rsID = dbsnp == null ? null : VCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbsnp.dbsnp, context.getLocation()), VariantContext.Type.SNP);
|
||||
String rsID = dbsnp == null ? null : GATKVCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbsnp.dbsnp, context.getLocation()), VariantContext.Type.SNP);
|
||||
|
||||
Collection<VariantContext> contexts = getVariantContexts(tracker, ref);
|
||||
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ import org.broadinstitute.sting.commandline.Input;
|
|||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
||||
import org.broadinstitute.variant.bcf2.BCF2Codec;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.vcf.VCFCodec;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ import org.apache.commons.math.MathException;
|
|||
import org.apache.commons.math.distribution.NormalDistribution;
|
||||
import org.apache.commons.math.distribution.NormalDistributionImpl;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ package org.broadinstitute.sting.utils;
|
|||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
|
||||
import java.util.*;
|
||||
|
|
|
|||
|
|
@ -29,11 +29,11 @@ import net.sf.samtools.SAMFileHeader;
|
|||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.text.ListFileUtils;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
|
|
@ -117,15 +117,15 @@ public class SampleUtils {
|
|||
}
|
||||
|
||||
public static Set<String> getSampleList(Map<String, VCFHeader> headers) {
|
||||
return getSampleList(headers, VariantContextUtils.GenotypeMergeType.PRIORITIZE);
|
||||
return getSampleList(headers, GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE);
|
||||
}
|
||||
|
||||
public static Set<String> getSampleList(Map<String, VCFHeader> headers, VariantContextUtils.GenotypeMergeType mergeOption) {
|
||||
public static Set<String> getSampleList(Map<String, VCFHeader> headers, GATKVariantContextUtils.GenotypeMergeType mergeOption) {
|
||||
Set<String> samples = new TreeSet<String>();
|
||||
for ( Map.Entry<String, VCFHeader> val : headers.entrySet() ) {
|
||||
VCFHeader header = val.getValue();
|
||||
for ( String sample : header.getGenotypeSamples() ) {
|
||||
samples.add(VariantContextUtils.mergedSampleName(val.getKey(), sample, mergeOption == VariantContextUtils.GenotypeMergeType.UNIQUIFY));
|
||||
samples.add(GATKVariantContextUtils.mergedSampleName(val.getKey(), sample, mergeOption == GATKVariantContextUtils.GenotypeMergeType.UNIQUIFY));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ import net.sf.samtools.CigarOperator;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@
|
|||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.variant.utils;
|
||||
package org.broadinstitute.sting.utils.collections;
|
||||
|
||||
|
||||
public class Pair<X,Y> {
|
||||
|
|
@ -29,7 +29,7 @@ import org.broadinstitute.variant.utils.BaseUtils;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ import net.sf.samtools.CigarElement;
|
|||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ import org.broadinstitute.sting.gatk.CommandLineGATK;
|
|||
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.classloader.JVMUtils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.NGSPlatform;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.variant.utils.BaseUtils;
|
||||
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ import org.broad.tribble.readers.PositionalBufferedStream;
|
|||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
|
||||
|
|
@ -147,4 +147,49 @@ public class GATKVCFUtils {
|
|||
return VCFUtils.withUpdatedContigs(header, engine.getArguments().referenceFile, engine.getMasterSequenceDictionary());
|
||||
}
|
||||
|
||||
public static String rsIDOfFirstRealVariant(List<VariantContext> VCs, VariantContext.Type type) {
|
||||
if ( VCs == null )
|
||||
return null;
|
||||
|
||||
String rsID = null;
|
||||
for ( VariantContext vc : VCs ) {
|
||||
if ( vc.getType() == type ) {
|
||||
rsID = vc.getID();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return rsID;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read all of the VCF records from source into memory, returning the header and the VariantContexts
|
||||
*
|
||||
* SHOULD ONLY BE USED FOR UNIT/INTEGRATION TESTING PURPOSES!
|
||||
*
|
||||
* @param source the file to read, must be in VCF4 format
|
||||
* @return
|
||||
* @throws java.io.IOException
|
||||
*/
|
||||
public static Pair<VCFHeader, List<VariantContext>> readVCF(final File source) throws IOException {
|
||||
// read in the features
|
||||
final List<VariantContext> vcs = new ArrayList<VariantContext>();
|
||||
final VCFCodec codec = new VCFCodec();
|
||||
PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(source));
|
||||
FeatureCodecHeader header = codec.readHeader(pbs);
|
||||
pbs.close();
|
||||
|
||||
pbs = new PositionalBufferedStream(new FileInputStream(source));
|
||||
pbs.skip(header.getHeaderEnd());
|
||||
|
||||
final VCFHeader vcfHeader = (VCFHeader)header.getHeaderValue();
|
||||
|
||||
while ( ! pbs.isDone() ) {
|
||||
final VariantContext vc = codec.decode(pbs);
|
||||
if ( vc != null )
|
||||
vcs.add(vc);
|
||||
}
|
||||
|
||||
return new Pair<VCFHeader, List<VariantContext>>(vcfHeader, vcs);
|
||||
}
|
||||
}
|
||||
|
|
@ -25,12 +25,79 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.variant;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broad.tribble.util.popgen.HardyWeinbergCalculation;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
import org.broadinstitute.variant.vcf.VCFConstants;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.*;
|
||||
|
||||
public class GATKVariantContextUtils {
|
||||
|
||||
private static Logger logger = Logger.getLogger(GATKVariantContextUtils.class);
|
||||
|
||||
public static final int DEFAULT_PLOIDY = 2;
|
||||
public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
|
||||
private static final List<Allele> NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
||||
public final static String MERGE_FILTER_PREFIX = "filterIn";
|
||||
public final static String MERGE_REF_IN_ALL = "ReferenceInAll";
|
||||
public final static String MERGE_FILTER_IN_ALL = "FilteredInAll";
|
||||
public final static String MERGE_INTERSECTION = "Intersection";
|
||||
|
||||
public enum GenotypeMergeType {
|
||||
/**
|
||||
* Make all sample genotypes unique by file. Each sample shared across RODs gets named sample.ROD.
|
||||
*/
|
||||
UNIQUIFY,
|
||||
/**
|
||||
* Take genotypes in priority order (see the priority argument).
|
||||
*/
|
||||
PRIORITIZE,
|
||||
/**
|
||||
* Take the genotypes in any order.
|
||||
*/
|
||||
UNSORTED,
|
||||
/**
|
||||
* Require that all samples/genotypes be unique between all inputs.
|
||||
*/
|
||||
REQUIRE_UNIQUE
|
||||
}
|
||||
|
||||
public enum FilteredRecordMergeType {
|
||||
/**
|
||||
* Union - leaves the record if any record is unfiltered.
|
||||
*/
|
||||
KEEP_IF_ANY_UNFILTERED,
|
||||
/**
|
||||
* Requires all records present at site to be unfiltered. VCF files that don't contain the record don't influence this.
|
||||
*/
|
||||
KEEP_IF_ALL_UNFILTERED,
|
||||
/**
|
||||
* If any record is present at this site (regardless of possibly being filtered), then all such records are kept and the filters are reset.
|
||||
*/
|
||||
KEEP_UNCONDITIONAL
|
||||
}
|
||||
|
||||
public enum MultipleAllelesMergeType {
|
||||
/**
|
||||
* Combine only alleles of the same type (SNP, indel, etc.) into a single VCF record.
|
||||
*/
|
||||
BY_TYPE,
|
||||
/**
|
||||
* Merge all allele types at the same start position into the same VCF record.
|
||||
*/
|
||||
MIX_TYPES
|
||||
}
|
||||
|
||||
/**
|
||||
* create a genome location, given a variant context
|
||||
* @param genomeLocParser parser
|
||||
|
|
@ -41,4 +108,885 @@ public class GATKVariantContextUtils {
|
|||
return genomeLocParser.createGenomeLoc(vc.getChr(), vc.getStart(), vc.getEnd(), true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true iff VC is an non-complex indel where every allele represents an expansion or
|
||||
* contraction of a series of identical bases in the reference.
|
||||
*
|
||||
* For example, suppose the ref bases are CTCTCTGA, which includes a 3x repeat of CTCTCT
|
||||
*
|
||||
* If VC = -/CT, then this function returns true because the CT insertion matches exactly the
|
||||
* upcoming reference.
|
||||
* If VC = -/CTA then this function returns false because the CTA isn't a perfect match
|
||||
*
|
||||
* Now consider deletions:
|
||||
*
|
||||
* If VC = CT/- then again the same logic applies and this returns true
|
||||
* The case of CTA/- makes no sense because it doesn't actually match the reference bases.
|
||||
*
|
||||
* The logic of this function is pretty simple. Take all of the non-null alleles in VC. For
|
||||
* each insertion allele of n bases, check if that allele matches the next n reference bases.
|
||||
* For each deletion allele of n bases, check if this matches the reference bases at n - 2 n,
|
||||
* as it must necessarily match the first n bases. If this test returns true for all
|
||||
* alleles you are a tandem repeat, otherwise you are not.
|
||||
*
|
||||
* @param vc
|
||||
* @param refBasesStartingAtVCWithPad not this is assumed to include the PADDED reference
|
||||
* @return
|
||||
*/
|
||||
@Requires({"vc != null", "refBasesStartingAtVCWithPad != null && refBasesStartingAtVCWithPad.length > 0"})
|
||||
public static boolean isTandemRepeat(final VariantContext vc, final byte[] refBasesStartingAtVCWithPad) {
|
||||
final String refBasesStartingAtVCWithoutPad = new String(refBasesStartingAtVCWithPad).substring(1);
|
||||
if ( ! vc.isIndel() ) // only indels are tandem repeats
|
||||
return false;
|
||||
|
||||
final Allele ref = vc.getReference();
|
||||
|
||||
for ( final Allele allele : vc.getAlternateAlleles() ) {
|
||||
if ( ! isRepeatAllele(ref, allele, refBasesStartingAtVCWithoutPad) )
|
||||
return false;
|
||||
}
|
||||
|
||||
// we've passed all of the tests, so we are a repeat
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param vc
|
||||
* @param refBasesStartingAtVCWithPad
|
||||
* @return
|
||||
*/
|
||||
@Requires({"vc != null", "refBasesStartingAtVCWithPad != null && refBasesStartingAtVCWithPad.length > 0"})
|
||||
public static Pair<List<Integer>,byte[]> getNumTandemRepeatUnits(final VariantContext vc, final byte[] refBasesStartingAtVCWithPad) {
|
||||
final boolean VERBOSE = false;
|
||||
final String refBasesStartingAtVCWithoutPad = new String(refBasesStartingAtVCWithPad).substring(1);
|
||||
if ( ! vc.isIndel() ) // only indels are tandem repeats
|
||||
return null;
|
||||
|
||||
final Allele refAllele = vc.getReference();
|
||||
final byte[] refAlleleBases = Arrays.copyOfRange(refAllele.getBases(), 1, refAllele.length());
|
||||
|
||||
byte[] repeatUnit = null;
|
||||
final ArrayList<Integer> lengths = new ArrayList<Integer>();
|
||||
|
||||
for ( final Allele allele : vc.getAlternateAlleles() ) {
|
||||
Pair<int[],byte[]> result = getNumTandemRepeatUnits(refAlleleBases, Arrays.copyOfRange(allele.getBases(), 1, allele.length()), refBasesStartingAtVCWithoutPad.getBytes());
|
||||
|
||||
final int[] repetitionCount = result.first;
|
||||
// repetition count = 0 means allele is not a tandem expansion of context
|
||||
if (repetitionCount[0] == 0 || repetitionCount[1] == 0)
|
||||
return null;
|
||||
|
||||
if (lengths.size() == 0) {
|
||||
lengths.add(repetitionCount[0]); // add ref allele length only once
|
||||
}
|
||||
lengths.add(repetitionCount[1]); // add this alt allele's length
|
||||
|
||||
repeatUnit = result.second;
|
||||
if (VERBOSE) {
|
||||
System.out.println("RefContext:"+refBasesStartingAtVCWithoutPad);
|
||||
System.out.println("Ref:"+refAllele.toString()+" Count:" + String.valueOf(repetitionCount[0]));
|
||||
System.out.println("Allele:"+allele.toString()+" Count:" + String.valueOf(repetitionCount[1]));
|
||||
System.out.println("RU:"+new String(repeatUnit));
|
||||
}
|
||||
}
|
||||
|
||||
return new Pair<List<Integer>, byte[]>(lengths,repeatUnit);
|
||||
}
|
||||
|
||||
public static Pair<int[],byte[]> getNumTandemRepeatUnits(final byte[] refBases, final byte[] altBases, final byte[] remainingRefContext) {
|
||||
/* we can't exactly apply same logic as in basesAreRepeated() to compute tandem unit and number of repeated units.
|
||||
Consider case where ref =ATATAT and we have an insertion of ATAT. Natural description is (AT)3 -> (AT)2.
|
||||
*/
|
||||
|
||||
byte[] longB;
|
||||
// find first repeat unit based on either ref or alt, whichever is longer
|
||||
if (altBases.length > refBases.length)
|
||||
longB = altBases;
|
||||
else
|
||||
longB = refBases;
|
||||
|
||||
// see if non-null allele (either ref or alt, whichever is longer) can be decomposed into several identical tandem units
|
||||
// for example, -*,CACA needs to first be decomposed into (CA)2
|
||||
final int repeatUnitLength = findRepeatedSubstring(longB);
|
||||
final byte[] repeatUnit = Arrays.copyOf(longB, repeatUnitLength);
|
||||
|
||||
final int[] repetitionCount = new int[2];
|
||||
// look for repetitions forward on the ref bases (i.e. starting at beginning of ref bases)
|
||||
int repetitionsInRef = findNumberofRepetitions(repeatUnit,refBases, true);
|
||||
repetitionCount[0] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(refBases, remainingRefContext), true)-repetitionsInRef;
|
||||
repetitionCount[1] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(altBases, remainingRefContext), true)-repetitionsInRef;
|
||||
|
||||
return new Pair<int[], byte[]>(repetitionCount, repeatUnit);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Find out if a string can be represented as a tandem number of substrings.
|
||||
* For example ACTACT is a 2-tandem of ACT,
|
||||
* but ACTACA is not.
|
||||
*
|
||||
* @param bases String to be tested
|
||||
* @return Length of repeat unit, if string can be represented as tandem of substring (if it can't
|
||||
* be represented as one, it will be just the length of the input string)
|
||||
*/
|
||||
public static int findRepeatedSubstring(byte[] bases) {
|
||||
|
||||
int repLength;
|
||||
for (repLength=1; repLength <=bases.length; repLength++) {
|
||||
final byte[] candidateRepeatUnit = Arrays.copyOf(bases,repLength);
|
||||
boolean allBasesMatch = true;
|
||||
for (int start = repLength; start < bases.length; start += repLength ) {
|
||||
// check that remaining of string is exactly equal to repeat unit
|
||||
final byte[] basePiece = Arrays.copyOfRange(bases,start,start+candidateRepeatUnit.length);
|
||||
if (!Arrays.equals(candidateRepeatUnit, basePiece)) {
|
||||
allBasesMatch = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (allBasesMatch)
|
||||
return repLength;
|
||||
}
|
||||
|
||||
return repLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper routine that finds number of repetitions a string consists of.
|
||||
* For example, for string ATAT and repeat unit AT, number of repetitions = 2
|
||||
* @param repeatUnit Substring
|
||||
* @param testString String to test
|
||||
* @oaram lookForward Look for repetitions forward (at beginning of string) or backward (at end of string)
|
||||
* @return Number of repetitions (0 if testString is not a concatenation of n repeatUnit's
|
||||
*/
|
||||
public static int findNumberofRepetitions(byte[] repeatUnit, byte[] testString, boolean lookForward) {
|
||||
int numRepeats = 0;
|
||||
if (lookForward) {
|
||||
// look forward on the test string
|
||||
for (int start = 0; start < testString.length; start += repeatUnit.length) {
|
||||
int end = start + repeatUnit.length;
|
||||
byte[] unit = Arrays.copyOfRange(testString,start, end);
|
||||
if(Arrays.equals(unit,repeatUnit))
|
||||
numRepeats++;
|
||||
else
|
||||
break;
|
||||
}
|
||||
return numRepeats;
|
||||
}
|
||||
|
||||
// look backward. For example, if repeatUnit = AT and testString = GATAT, number of repeat units is still 2
|
||||
// look forward on the test string
|
||||
for (int start = testString.length - repeatUnit.length; start >= 0; start -= repeatUnit.length) {
|
||||
int end = start + repeatUnit.length;
|
||||
byte[] unit = Arrays.copyOfRange(testString,start, end);
|
||||
if(Arrays.equals(unit,repeatUnit))
|
||||
numRepeats++;
|
||||
else
|
||||
break;
|
||||
}
|
||||
return numRepeats;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function for isTandemRepeat that checks that allele matches somewhere on the reference
|
||||
* @param ref
|
||||
* @param alt
|
||||
* @param refBasesStartingAtVCWithoutPad
|
||||
* @return
|
||||
*/
|
||||
protected static boolean isRepeatAllele(final Allele ref, final Allele alt, final String refBasesStartingAtVCWithoutPad) {
|
||||
if ( ! Allele.oneIsPrefixOfOther(ref, alt) )
|
||||
return false; // we require one allele be a prefix of another
|
||||
|
||||
if ( ref.length() > alt.length() ) { // we are a deletion
|
||||
return basesAreRepeated(ref.getBaseString(), alt.getBaseString(), refBasesStartingAtVCWithoutPad, 2);
|
||||
} else { // we are an insertion
|
||||
return basesAreRepeated(alt.getBaseString(), ref.getBaseString(), refBasesStartingAtVCWithoutPad, 1);
|
||||
}
|
||||
}
|
||||
|
||||
protected static boolean basesAreRepeated(final String l, final String s, final String ref, final int minNumberOfMatches) {
|
||||
final String potentialRepeat = l.substring(s.length()); // skip s bases
|
||||
|
||||
for ( int i = 0; i < minNumberOfMatches; i++) {
|
||||
final int start = i * potentialRepeat.length();
|
||||
final int end = (i+1) * potentialRepeat.length();
|
||||
if ( ref.length() < end )
|
||||
return false; // we ran out of bases to test
|
||||
final String refSub = ref.substring(start, end);
|
||||
if ( ! refSub.equals(potentialRepeat) )
|
||||
return false; // repeat didn't match, fail
|
||||
}
|
||||
|
||||
return true; // we passed all tests, we matched
|
||||
}
|
||||
|
||||
/**
|
||||
* subset the Variant Context to the specific set of alleles passed in (pruning the PLs appropriately)
|
||||
*
|
||||
* @param vc variant context with genotype likelihoods
|
||||
* @param allelesToUse which alleles from the vc are okay to use; *** must be in the same relative order as those in the original VC ***
|
||||
* @param assignGenotypes true if we should update the genotypes based on the (subsetted) PLs
|
||||
* @return genotypes
|
||||
*/
|
||||
public static GenotypesContext subsetDiploidAlleles(final VariantContext vc,
|
||||
final List<Allele> allelesToUse,
|
||||
final boolean assignGenotypes) {
|
||||
|
||||
// the genotypes with PLs
|
||||
final GenotypesContext oldGTs = vc.getGenotypes();
|
||||
|
||||
// samples
|
||||
final List<String> sampleIndices = oldGTs.getSampleNamesOrderedByName();
|
||||
|
||||
// the new genotypes to create
|
||||
final GenotypesContext newGTs = GenotypesContext.create();
|
||||
|
||||
// we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward
|
||||
final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
|
||||
final int numNewAltAlleles = allelesToUse.size() - 1;
|
||||
|
||||
// which PLs should be carried forward?
|
||||
ArrayList<Integer> likelihoodIndexesToUse = null;
|
||||
|
||||
// an optimization: if we are supposed to use all (or none in the case of a ref call) of the alleles,
|
||||
// then we can keep the PLs as is; otherwise, we determine which ones to keep
|
||||
if ( numNewAltAlleles != numOriginalAltAlleles && numNewAltAlleles > 0 ) {
|
||||
likelihoodIndexesToUse = new ArrayList<Integer>(30);
|
||||
|
||||
final boolean[] altAlleleIndexToUse = new boolean[numOriginalAltAlleles];
|
||||
for ( int i = 0; i < numOriginalAltAlleles; i++ ) {
|
||||
if ( allelesToUse.contains(vc.getAlternateAllele(i)) )
|
||||
altAlleleIndexToUse[i] = true;
|
||||
}
|
||||
|
||||
// numLikelihoods takes total # of alleles. Use default # of chromosomes (ploidy) = 2
|
||||
final int numLikelihoods = GenotypeLikelihoods.numLikelihoods(1 + numOriginalAltAlleles, DEFAULT_PLOIDY);
|
||||
for ( int PLindex = 0; PLindex < numLikelihoods; PLindex++ ) {
|
||||
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
|
||||
// consider this entry only if both of the alleles are good
|
||||
if ( (alleles.alleleIndex1 == 0 || altAlleleIndexToUse[alleles.alleleIndex1 - 1]) && (alleles.alleleIndex2 == 0 || altAlleleIndexToUse[alleles.alleleIndex2 - 1]) )
|
||||
likelihoodIndexesToUse.add(PLindex);
|
||||
}
|
||||
}
|
||||
|
||||
// create the new genotypes
|
||||
for ( int k = 0; k < oldGTs.size(); k++ ) {
|
||||
final Genotype g = oldGTs.get(sampleIndices.get(k));
|
||||
if ( !g.hasLikelihoods() ) {
|
||||
newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES));
|
||||
continue;
|
||||
}
|
||||
|
||||
// create the new likelihoods array from the alleles we are allowed to use
|
||||
final double[] originalLikelihoods = g.getLikelihoods().getAsVector();
|
||||
double[] newLikelihoods;
|
||||
if ( likelihoodIndexesToUse == null ) {
|
||||
newLikelihoods = originalLikelihoods;
|
||||
} else {
|
||||
newLikelihoods = new double[likelihoodIndexesToUse.size()];
|
||||
int newIndex = 0;
|
||||
for ( int oldIndex : likelihoodIndexesToUse )
|
||||
newLikelihoods[newIndex++] = originalLikelihoods[oldIndex];
|
||||
|
||||
// might need to re-normalize
|
||||
newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true);
|
||||
}
|
||||
|
||||
// if there is no mass on the (new) likelihoods, then just no-call the sample
|
||||
if ( MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) {
|
||||
newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES));
|
||||
}
|
||||
else {
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(g);
|
||||
|
||||
if ( numNewAltAlleles == 0 )
|
||||
gb.noPL();
|
||||
else
|
||||
gb.PL(newLikelihoods);
|
||||
|
||||
// if we weren't asked to assign a genotype, then just no-call the sample
|
||||
if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) {
|
||||
gb.alleles(NO_CALL_ALLELES);
|
||||
}
|
||||
else {
|
||||
// find the genotype with maximum likelihoods
|
||||
int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods);
|
||||
GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
|
||||
|
||||
gb.alleles(Arrays.asList(allelesToUse.get(alleles.alleleIndex1), allelesToUse.get(alleles.alleleIndex2)));
|
||||
if ( numNewAltAlleles != 0 ) gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods));
|
||||
}
|
||||
newGTs.add(gb.make());
|
||||
}
|
||||
}
|
||||
|
||||
return newGTs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Assign genotypes (GTs) to the samples in the Variant Context greedily based on the PLs
|
||||
*
|
||||
* @param vc variant context with genotype likelihoods
|
||||
* @return genotypes context
|
||||
*/
|
||||
public static GenotypesContext assignDiploidGenotypes(final VariantContext vc) {
|
||||
return subsetDiploidAlleles(vc, vc.getAlleles(), true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Split variant context into its biallelic components if there are more than 2 alleles
|
||||
*
|
||||
* For VC has A/B/C alleles, returns A/B and A/C contexts.
|
||||
* Genotypes are all no-calls now (it's not possible to fix them easily)
|
||||
* Alleles are right trimmed to satisfy VCF conventions
|
||||
*
|
||||
* If vc is biallelic or non-variant it is just returned
|
||||
*
|
||||
* Chromosome counts are updated (but they are by definition 0)
|
||||
*
|
||||
* @param vc a potentially multi-allelic variant context
|
||||
* @return a list of bi-allelic (or monomorphic) variant context
|
||||
*/
|
||||
public static List<VariantContext> splitVariantContextToBiallelics(final VariantContext vc) {
|
||||
if ( ! vc.isVariant() || vc.isBiallelic() )
|
||||
// non variant or biallelics already satisfy the contract
|
||||
return Collections.singletonList(vc);
|
||||
else {
|
||||
final List<VariantContext> biallelics = new LinkedList<VariantContext>();
|
||||
|
||||
for ( final Allele alt : vc.getAlternateAlleles() ) {
|
||||
VariantContextBuilder builder = new VariantContextBuilder(vc);
|
||||
final List<Allele> alleles = Arrays.asList(vc.getReference(), alt);
|
||||
builder.alleles(alleles);
|
||||
builder.genotypes(subsetDiploidAlleles(vc, alleles, false));
|
||||
VariantContextUtils.calculateChromosomeCounts(builder, true);
|
||||
biallelics.add(reverseTrimAlleles(builder.make()));
|
||||
}
|
||||
|
||||
return biallelics;
|
||||
}
|
||||
}
|
||||
|
||||
public static Genotype removePLsAndAD(final Genotype g) {
|
||||
return ( g.hasLikelihoods() || g.hasAD() ) ? new GenotypeBuilder(g).noPL().noAD().make() : g;
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges VariantContexts into a single hybrid. Takes genotypes for common samples in priority order, if provided.
|
||||
* If uniquifySamples is true, the priority order is ignored and names are created by concatenating the VC name with
|
||||
* the sample name
|
||||
*
|
||||
* @param unsortedVCs collection of unsorted VCs
|
||||
* @param priorityListOfVCs priority list detailing the order in which we should grab the VCs
|
||||
* @param filteredRecordMergeType merge type for filtered records
|
||||
* @param genotypeMergeOptions merge option for genotypes
|
||||
* @param annotateOrigin should we annotate the set it came from?
|
||||
* @param printMessages should we print messages?
|
||||
* @param setKey the key name of the set
|
||||
* @param filteredAreUncalled are filtered records uncalled?
|
||||
* @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count?
|
||||
* @return new VariantContext representing the merge of unsortedVCs
|
||||
*/
|
||||
public static VariantContext simpleMerge(final Collection<VariantContext> unsortedVCs,
|
||||
final List<String> priorityListOfVCs,
|
||||
final FilteredRecordMergeType filteredRecordMergeType,
|
||||
final GenotypeMergeType genotypeMergeOptions,
|
||||
final boolean annotateOrigin,
|
||||
final boolean printMessages,
|
||||
final String setKey,
|
||||
final boolean filteredAreUncalled,
|
||||
final boolean mergeInfoWithMaxAC ) {
|
||||
int originalNumOfVCs = priorityListOfVCs == null ? 0 : priorityListOfVCs.size();
|
||||
return simpleMerge(unsortedVCs,priorityListOfVCs,originalNumOfVCs,filteredRecordMergeType,genotypeMergeOptions,annotateOrigin,printMessages,setKey,filteredAreUncalled,mergeInfoWithMaxAC);
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges VariantContexts into a single hybrid. Takes genotypes for common samples in priority order, if provided.
|
||||
* If uniquifySamples is true, the priority order is ignored and names are created by concatenating the VC name with
|
||||
* the sample name.
|
||||
* simpleMerge does not verify any more unique sample names EVEN if genotypeMergeOptions == GenotypeMergeType.REQUIRE_UNIQUE. One should use
|
||||
* SampleUtils.verifyUniqueSamplesNames to check that before using sempleMerge.
|
||||
*
|
||||
* @param unsortedVCs collection of unsorted VCs
|
||||
* @param priorityListOfVCs priority list detailing the order in which we should grab the VCs
|
||||
* @param filteredRecordMergeType merge type for filtered records
|
||||
* @param genotypeMergeOptions merge option for genotypes
|
||||
* @param annotateOrigin should we annotate the set it came from?
|
||||
* @param printMessages should we print messages?
|
||||
* @param setKey the key name of the set
|
||||
* @param filteredAreUncalled are filtered records uncalled?
|
||||
* @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count?
|
||||
* @return new VariantContext representing the merge of unsortedVCs
|
||||
*/
|
||||
public static VariantContext simpleMerge(final Collection<VariantContext> unsortedVCs,
|
||||
final List<String> priorityListOfVCs,
|
||||
final int originalNumOfVCs,
|
||||
final FilteredRecordMergeType filteredRecordMergeType,
|
||||
final GenotypeMergeType genotypeMergeOptions,
|
||||
final boolean annotateOrigin,
|
||||
final boolean printMessages,
|
||||
final String setKey,
|
||||
final boolean filteredAreUncalled,
|
||||
final boolean mergeInfoWithMaxAC ) {
|
||||
|
||||
if ( unsortedVCs == null || unsortedVCs.size() == 0 )
|
||||
return null;
|
||||
|
||||
if (priorityListOfVCs != null && originalNumOfVCs != priorityListOfVCs.size())
|
||||
throw new IllegalArgumentException("the number of the original VariantContexts must be the same as the number of VariantContexts in the priority list");
|
||||
|
||||
if ( annotateOrigin && priorityListOfVCs == null && originalNumOfVCs == 0)
|
||||
throw new IllegalArgumentException("Cannot merge calls and annotate their origins without a complete priority list of VariantContexts or the number of original VariantContexts");
|
||||
|
||||
final List<VariantContext> preFilteredVCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, genotypeMergeOptions);
|
||||
// Make sure all variant contexts are padded with reference base in case of indels if necessary
|
||||
final List<VariantContext> VCs = new ArrayList<VariantContext>();
|
||||
|
||||
for (final VariantContext vc : preFilteredVCs) {
|
||||
if ( ! filteredAreUncalled || vc.isNotFiltered() )
|
||||
VCs.add(vc);
|
||||
}
|
||||
if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled
|
||||
return null;
|
||||
|
||||
// establish the baseline info from the first VC
|
||||
final VariantContext first = VCs.get(0);
|
||||
final String name = first.getSource();
|
||||
final Allele refAllele = determineReferenceAllele(VCs);
|
||||
|
||||
final Set<Allele> alleles = new LinkedHashSet<Allele>();
|
||||
final Set<String> filters = new HashSet<String>();
|
||||
final Map<String, Object> attributes = new LinkedHashMap<String, Object>();
|
||||
final Set<String> inconsistentAttributes = new HashSet<String>();
|
||||
final Set<String> variantSources = new HashSet<String>(); // contains the set of sources we found in our set of VCs that are variant
|
||||
final Set<String> rsIDs = new LinkedHashSet<String>(1); // most of the time there's one id
|
||||
|
||||
VariantContext longestVC = first;
|
||||
int depth = 0;
|
||||
int maxAC = -1;
|
||||
final Map<String, Object> attributesWithMaxAC = new LinkedHashMap<String, Object>();
|
||||
double log10PError = CommonInfo.NO_LOG10_PERROR;
|
||||
VariantContext vcWithMaxAC = null;
|
||||
GenotypesContext genotypes = GenotypesContext.create();
|
||||
|
||||
// counting the number of filtered and variant VCs
|
||||
int nFiltered = 0;
|
||||
|
||||
boolean remapped = false;
|
||||
|
||||
// cycle through and add info from the other VCs, making sure the loc/reference matches
|
||||
|
||||
for ( final VariantContext vc : VCs ) {
|
||||
if ( longestVC.getStart() != vc.getStart() )
|
||||
throw new IllegalStateException("BUG: attempting to merge VariantContexts with different start sites: first="+ first.toString() + " second=" + vc.toString());
|
||||
|
||||
if ( VariantContextUtils.getSize(vc) > VariantContextUtils.getSize(longestVC) )
|
||||
longestVC = vc; // get the longest location
|
||||
|
||||
nFiltered += vc.isFiltered() ? 1 : 0;
|
||||
if ( vc.isVariant() ) variantSources.add(vc.getSource());
|
||||
|
||||
AlleleMapper alleleMapping = resolveIncompatibleAlleles(refAllele, vc, alleles);
|
||||
remapped = remapped || alleleMapping.needsRemapping();
|
||||
|
||||
alleles.addAll(alleleMapping.values());
|
||||
|
||||
mergeGenotypes(genotypes, vc, alleleMapping, genotypeMergeOptions == GenotypeMergeType.UNIQUIFY);
|
||||
|
||||
// We always take the QUAL of the first VC with a non-MISSING qual for the combined value
|
||||
if ( log10PError == CommonInfo.NO_LOG10_PERROR )
|
||||
log10PError = vc.getLog10PError();
|
||||
|
||||
filters.addAll(vc.getFilters());
|
||||
|
||||
//
|
||||
// add attributes
|
||||
//
|
||||
// special case DP (add it up) and ID (just preserve it)
|
||||
//
|
||||
if (vc.hasAttribute(VCFConstants.DEPTH_KEY))
|
||||
depth += vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0);
|
||||
if ( vc.hasID() ) rsIDs.add(vc.getID());
|
||||
if (mergeInfoWithMaxAC && vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY)) {
|
||||
String rawAlleleCounts = vc.getAttributeAsString(VCFConstants.ALLELE_COUNT_KEY, null);
|
||||
// lets see if the string contains a , separator
|
||||
if (rawAlleleCounts.contains(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR)) {
|
||||
List<String> alleleCountArray = Arrays.asList(rawAlleleCounts.substring(1, rawAlleleCounts.length() - 1).split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR));
|
||||
for (String alleleCount : alleleCountArray) {
|
||||
final int ac = Integer.valueOf(alleleCount.trim());
|
||||
if (ac > maxAC) {
|
||||
maxAC = ac;
|
||||
vcWithMaxAC = vc;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
final int ac = Integer.valueOf(rawAlleleCounts);
|
||||
if (ac > maxAC) {
|
||||
maxAC = ac;
|
||||
vcWithMaxAC = vc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (final Map.Entry<String, Object> p : vc.getAttributes().entrySet()) {
|
||||
String key = p.getKey();
|
||||
// if we don't like the key already, don't go anywhere
|
||||
if ( ! inconsistentAttributes.contains(key) ) {
|
||||
final boolean alreadyFound = attributes.containsKey(key);
|
||||
final Object boundValue = attributes.get(key);
|
||||
final boolean boundIsMissingValue = alreadyFound && boundValue.equals(VCFConstants.MISSING_VALUE_v4);
|
||||
|
||||
if ( alreadyFound && ! boundValue.equals(p.getValue()) && ! boundIsMissingValue ) {
|
||||
// we found the value but we're inconsistent, put it in the exclude list
|
||||
//System.out.printf("Inconsistent INFO values: %s => %s and %s%n", key, boundValue, p.getValue());
|
||||
inconsistentAttributes.add(key);
|
||||
attributes.remove(key);
|
||||
} else if ( ! alreadyFound || boundIsMissingValue ) { // no value
|
||||
//if ( vc != first ) System.out.printf("Adding key %s => %s%n", p.getKey(), p.getValue());
|
||||
attributes.put(key, p.getValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// if we have more alternate alleles in the merged VC than in one or more of the
|
||||
// original VCs, we need to strip out the GL/PLs (because they are no longer accurate), as well as allele-dependent attributes like AC,AF, and AD
|
||||
for ( final VariantContext vc : VCs ) {
|
||||
if (vc.getAlleles().size() == 1)
|
||||
continue;
|
||||
if ( hasPLIncompatibleAlleles(alleles, vc.getAlleles())) {
|
||||
if ( ! genotypes.isEmpty() ) {
|
||||
logger.debug(String.format("Stripping PLs at %s:%d-%d due to incompatible alleles merged=%s vs. single=%s",
|
||||
vc.getChr(), vc.getStart(), vc.getEnd(), alleles, vc.getAlleles()));
|
||||
}
|
||||
genotypes = stripPLsAndAD(genotypes);
|
||||
// this will remove stale AC,AF attributed from vc
|
||||
VariantContextUtils.calculateChromosomeCounts(vc, attributes, true);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// take the VC with the maxAC and pull the attributes into a modifiable map
|
||||
if ( mergeInfoWithMaxAC && vcWithMaxAC != null ) {
|
||||
attributesWithMaxAC.putAll(vcWithMaxAC.getAttributes());
|
||||
}
|
||||
|
||||
// if at least one record was unfiltered and we want a union, clear all of the filters
|
||||
if ( (filteredRecordMergeType == FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED && nFiltered != VCs.size()) || filteredRecordMergeType == FilteredRecordMergeType.KEEP_UNCONDITIONAL )
|
||||
filters.clear();
|
||||
|
||||
|
||||
if ( annotateOrigin ) { // we care about where the call came from
|
||||
String setValue;
|
||||
if ( nFiltered == 0 && variantSources.size() == originalNumOfVCs ) // nothing was unfiltered
|
||||
setValue = MERGE_INTERSECTION;
|
||||
else if ( nFiltered == VCs.size() ) // everything was filtered out
|
||||
setValue = MERGE_FILTER_IN_ALL;
|
||||
else if ( variantSources.isEmpty() ) // everyone was reference
|
||||
setValue = MERGE_REF_IN_ALL;
|
||||
else {
|
||||
final LinkedHashSet<String> s = new LinkedHashSet<String>();
|
||||
for ( final VariantContext vc : VCs )
|
||||
if ( vc.isVariant() )
|
||||
s.add( vc.isFiltered() ? MERGE_FILTER_PREFIX + vc.getSource() : vc.getSource() );
|
||||
setValue = Utils.join("-", s);
|
||||
}
|
||||
|
||||
if ( setKey != null ) {
|
||||
attributes.put(setKey, setValue);
|
||||
if( mergeInfoWithMaxAC && vcWithMaxAC != null ) {
|
||||
attributesWithMaxAC.put(setKey, setValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( depth > 0 )
|
||||
attributes.put(VCFConstants.DEPTH_KEY, String.valueOf(depth));
|
||||
|
||||
final String ID = rsIDs.isEmpty() ? VCFConstants.EMPTY_ID_FIELD : Utils.join(",", rsIDs);
|
||||
|
||||
final VariantContextBuilder builder = new VariantContextBuilder().source(name).id(ID);
|
||||
builder.loc(longestVC.getChr(), longestVC.getStart(), longestVC.getEnd());
|
||||
builder.alleles(alleles);
|
||||
builder.genotypes(genotypes);
|
||||
builder.log10PError(log10PError);
|
||||
builder.filters(filters.isEmpty() ? filters : new TreeSet<String>(filters));
|
||||
builder.attributes(new TreeMap<String, Object>(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes));
|
||||
|
||||
// Trim the padded bases of all alleles if necessary
|
||||
final VariantContext merged = builder.make();
|
||||
if ( printMessages && remapped ) System.out.printf("Remapped => %s%n", merged);
|
||||
return merged;
|
||||
}
|
||||
|
||||
private static final boolean hasPLIncompatibleAlleles(final Collection<Allele> alleleSet1, final Collection<Allele> alleleSet2) {
|
||||
final Iterator<Allele> it1 = alleleSet1.iterator();
|
||||
final Iterator<Allele> it2 = alleleSet2.iterator();
|
||||
|
||||
while ( it1.hasNext() && it2.hasNext() ) {
|
||||
final Allele a1 = it1.next();
|
||||
final Allele a2 = it2.next();
|
||||
if ( ! a1.equals(a2) )
|
||||
return true;
|
||||
}
|
||||
|
||||
// by this point, at least one of the iterators is empty. All of the elements
|
||||
// we've compared are equal up until this point. But it's possible that the
|
||||
// sets aren't the same size, which is indicated by the test below. If they
|
||||
// are of the same size, though, the sets are compatible
|
||||
return it1.hasNext() || it2.hasNext();
|
||||
}
|
||||
|
||||
public static GenotypesContext stripPLsAndAD(GenotypesContext genotypes) {
|
||||
GenotypesContext newGs = GenotypesContext.create(genotypes.size());
|
||||
|
||||
for ( final Genotype g : genotypes ) {
|
||||
newGs.add(removePLsAndAD(g));
|
||||
}
|
||||
|
||||
return newGs;
|
||||
}
|
||||
|
||||
static private Allele determineReferenceAllele(List<VariantContext> VCs) {
|
||||
Allele ref = null;
|
||||
|
||||
for ( VariantContext vc : VCs ) {
|
||||
Allele myRef = vc.getReference();
|
||||
if ( ref == null || ref.length() < myRef.length() )
|
||||
ref = myRef;
|
||||
else if ( ref.length() == myRef.length() && ! ref.equals(myRef) )
|
||||
throw new TribbleException(String.format("The provided variant file(s) have inconsistent references for the same position(s) at %s:%d, %s vs. %s", vc.getChr(), vc.getStart(), ref, myRef));
|
||||
}
|
||||
|
||||
return ref;
|
||||
}
|
||||
|
||||
static private AlleleMapper resolveIncompatibleAlleles(Allele refAllele, VariantContext vc, Set<Allele> allAlleles) {
|
||||
if ( refAllele.equals(vc.getReference()) )
|
||||
return new AlleleMapper(vc);
|
||||
else {
|
||||
// we really need to do some work. The refAllele is the longest reference allele seen at this
|
||||
// start site. So imagine it is:
|
||||
//
|
||||
// refAllele: ACGTGA
|
||||
// myRef: ACGT
|
||||
// myAlt: A
|
||||
//
|
||||
// We need to remap all of the alleles in vc to include the extra GA so that
|
||||
// myRef => refAllele and myAlt => AGA
|
||||
//
|
||||
|
||||
Allele myRef = vc.getReference();
|
||||
if ( refAllele.length() <= myRef.length() ) throw new IllegalStateException("BUG: myRef="+myRef+" is longer than refAllele="+refAllele);
|
||||
byte[] extraBases = Arrays.copyOfRange(refAllele.getBases(), myRef.length(), refAllele.length());
|
||||
|
||||
// System.out.printf("Remapping allele at %s%n", vc);
|
||||
// System.out.printf("ref %s%n", refAllele);
|
||||
// System.out.printf("myref %s%n", myRef );
|
||||
// System.out.printf("extrabases %s%n", new String(extraBases));
|
||||
|
||||
Map<Allele, Allele> map = new HashMap<Allele, Allele>();
|
||||
for ( Allele a : vc.getAlleles() ) {
|
||||
if ( a.isReference() )
|
||||
map.put(a, refAllele);
|
||||
else {
|
||||
Allele extended = Allele.extend(a, extraBases);
|
||||
for ( Allele b : allAlleles )
|
||||
if ( extended.equals(b) )
|
||||
extended = b;
|
||||
// System.out.printf(" Extending %s => %s%n", a, extended);
|
||||
map.put(a, extended);
|
||||
}
|
||||
}
|
||||
|
||||
// debugging
|
||||
// System.out.printf("mapping %s%n", map);
|
||||
|
||||
return new AlleleMapper(map);
|
||||
}
|
||||
}
|
||||
|
||||
public static List<VariantContext> sortVariantContextsByPriority(Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs, GenotypeMergeType mergeOption ) {
|
||||
if ( mergeOption == GenotypeMergeType.PRIORITIZE && priorityListOfVCs == null )
|
||||
throw new IllegalArgumentException("Cannot merge calls by priority with a null priority list");
|
||||
|
||||
if ( priorityListOfVCs == null || mergeOption == GenotypeMergeType.UNSORTED )
|
||||
return new ArrayList<VariantContext>(unsortedVCs);
|
||||
else {
|
||||
ArrayList<VariantContext> sorted = new ArrayList<VariantContext>(unsortedVCs);
|
||||
Collections.sort(sorted, new CompareByPriority(priorityListOfVCs));
|
||||
return sorted;
|
||||
}
|
||||
}
|
||||
|
||||
private static void mergeGenotypes(GenotypesContext mergedGenotypes, VariantContext oneVC, AlleleMapper alleleMapping, boolean uniqifySamples) {
|
||||
//TODO: should we add a check for cases when the genotypeMergeOption is REQUIRE_UNIQUE
|
||||
for ( Genotype g : oneVC.getGenotypes() ) {
|
||||
String name = mergedSampleName(oneVC.getSource(), g.getSampleName(), uniqifySamples);
|
||||
if ( ! mergedGenotypes.containsSample(name) ) {
|
||||
// only add if the name is new
|
||||
Genotype newG = g;
|
||||
|
||||
if ( uniqifySamples || alleleMapping.needsRemapping() ) {
|
||||
final List<Allele> alleles = alleleMapping.needsRemapping() ? alleleMapping.remap(g.getAlleles()) : g.getAlleles();
|
||||
newG = new GenotypeBuilder(g).name(name).alleles(alleles).make();
|
||||
}
|
||||
|
||||
mergedGenotypes.add(newG);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static String mergedSampleName(String trackName, String sampleName, boolean uniqify ) {
|
||||
return uniqify ? sampleName + "." + trackName : sampleName;
|
||||
}
|
||||
|
||||
public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) {
|
||||
|
||||
// see whether we need to trim common reference base from all alleles
|
||||
final int trimExtent = computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, false);
|
||||
if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 )
|
||||
return inputVC;
|
||||
|
||||
final List<Allele> alleles = new ArrayList<Allele>();
|
||||
final GenotypesContext genotypes = GenotypesContext.create();
|
||||
final Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
|
||||
|
||||
for (final Allele a : inputVC.getAlleles()) {
|
||||
if (a.isSymbolic()) {
|
||||
alleles.add(a);
|
||||
originalToTrimmedAlleleMap.put(a, a);
|
||||
} else {
|
||||
// get bases for current allele and create a new one with trimmed bases
|
||||
final byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent);
|
||||
final Allele trimmedAllele = Allele.create(newBases, a.isReference());
|
||||
alleles.add(trimmedAllele);
|
||||
originalToTrimmedAlleleMap.put(a, trimmedAllele);
|
||||
}
|
||||
}
|
||||
|
||||
// now we can recreate new genotypes with trimmed alleles
|
||||
for ( final Genotype genotype : inputVC.getGenotypes() ) {
|
||||
final List<Allele> originalAlleles = genotype.getAlleles();
|
||||
final List<Allele> trimmedAlleles = new ArrayList<Allele>();
|
||||
for ( final Allele a : originalAlleles ) {
|
||||
if ( a.isCalled() )
|
||||
trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
|
||||
else
|
||||
trimmedAlleles.add(Allele.NO_CALL);
|
||||
}
|
||||
genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make());
|
||||
}
|
||||
|
||||
return new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length() - 1).alleles(alleles).genotypes(genotypes).make();
|
||||
}
|
||||
|
||||
public static int computeReverseClipping(final List<Allele> unclippedAlleles,
|
||||
final byte[] ref,
|
||||
final int forwardClipping,
|
||||
final boolean allowFullClip) {
|
||||
int clipping = 0;
|
||||
boolean stillClipping = true;
|
||||
|
||||
while ( stillClipping ) {
|
||||
for ( final Allele a : unclippedAlleles ) {
|
||||
if ( a.isSymbolic() )
|
||||
continue;
|
||||
|
||||
// we need to ensure that we don't reverse clip out all of the bases from an allele because we then will have the wrong
|
||||
// position set for the VariantContext (although it's okay to forward clip it all out, because the position will be fine).
|
||||
if ( a.length() - clipping == 0 )
|
||||
return clipping - (allowFullClip ? 0 : 1);
|
||||
|
||||
if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 ) {
|
||||
stillClipping = false;
|
||||
}
|
||||
else if ( ref.length == clipping ) {
|
||||
if ( allowFullClip )
|
||||
stillClipping = false;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
else if ( a.getBases()[a.length()-clipping-1] != ref[ref.length-clipping-1] ) {
|
||||
stillClipping = false;
|
||||
}
|
||||
}
|
||||
if ( stillClipping )
|
||||
clipping++;
|
||||
}
|
||||
|
||||
return clipping;
|
||||
}
|
||||
|
||||
public static double computeHardyWeinbergPvalue(VariantContext vc) {
|
||||
if ( vc.getCalledChrCount() == 0 )
|
||||
return 0.0;
|
||||
return HardyWeinbergCalculation.hwCalculate(vc.getHomRefCount(), vc.getHetCount(), vc.getHomVarCount());
|
||||
}
|
||||
|
||||
public static boolean requiresPaddingBase(final List<String> alleles) {
|
||||
|
||||
// see whether one of the alleles would be null if trimmed through
|
||||
|
||||
for ( final String allele : alleles ) {
|
||||
if ( allele.isEmpty() )
|
||||
return true;
|
||||
}
|
||||
|
||||
int clipping = 0;
|
||||
Character currentBase = null;
|
||||
|
||||
while ( true ) {
|
||||
for ( final String allele : alleles ) {
|
||||
if ( allele.length() - clipping == 0 )
|
||||
return true;
|
||||
|
||||
char myBase = allele.charAt(clipping);
|
||||
if ( currentBase == null )
|
||||
currentBase = myBase;
|
||||
else if ( currentBase != myBase )
|
||||
return false;
|
||||
}
|
||||
|
||||
clipping++;
|
||||
currentBase = null;
|
||||
}
|
||||
}
|
||||
|
||||
private static class AlleleMapper {
|
||||
private VariantContext vc = null;
|
||||
private Map<Allele, Allele> map = null;
|
||||
public AlleleMapper(VariantContext vc) { this.vc = vc; }
|
||||
public AlleleMapper(Map<Allele, Allele> map) { this.map = map; }
|
||||
public boolean needsRemapping() { return this.map != null; }
|
||||
public Collection<Allele> values() { return map != null ? map.values() : vc.getAlleles(); }
|
||||
public Allele remap(Allele a) { return map != null && map.containsKey(a) ? map.get(a) : a; }
|
||||
|
||||
public List<Allele> remap(List<Allele> as) {
|
||||
List<Allele> newAs = new ArrayList<Allele>();
|
||||
for ( Allele a : as ) {
|
||||
//System.out.printf(" Remapping %s => %s%n", a, remap(a));
|
||||
newAs.add(remap(a));
|
||||
}
|
||||
return newAs;
|
||||
}
|
||||
}
|
||||
|
||||
private static class CompareByPriority implements Comparator<VariantContext>, Serializable {
|
||||
List<String> priorityListOfVCs;
|
||||
public CompareByPriority(List<String> priorityListOfVCs) {
|
||||
this.priorityListOfVCs = priorityListOfVCs;
|
||||
}
|
||||
|
||||
private int getIndex(VariantContext vc) {
|
||||
int i = priorityListOfVCs.indexOf(vc.getSource());
|
||||
if ( i == -1 ) throw new IllegalArgumentException("Priority list " + priorityListOfVCs + " doesn't contain variant context " + vc.getSource());
|
||||
return i;
|
||||
}
|
||||
|
||||
public int compare(VariantContext vc1, VariantContext vc2) {
|
||||
return Integer.valueOf(getIndex(vc1)).compareTo(getIndex(vc2));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,8 +26,6 @@
|
|||
package org.broadinstitute.variant.utils;
|
||||
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Random;
|
||||
|
|
@ -176,7 +174,7 @@ public class BaseUtils {
|
|||
if ( baseIndex == Base.N.ordinal() ) {
|
||||
bases[i] = 'N';
|
||||
} else if ( errorOnBadReferenceBase && baseIndex == -1 ) {
|
||||
throw new UserException.BadInput("We encountered a non-standard non-IUPAC base in the provided reference: '" + bases[i] + "'");
|
||||
throw new IllegalStateException("We encountered a non-standard non-IUPAC base in the provided reference: '" + bases[i] + "'");
|
||||
}
|
||||
}
|
||||
return bases;
|
||||
|
|
@ -517,7 +515,7 @@ public class BaseUtils {
|
|||
case 'N':
|
||||
return 'N';
|
||||
default:
|
||||
throw new ReviewedStingException("base must be A, C, G or T. " + (char) base + " is not a valid base.");
|
||||
throw new IllegalArgumentException("base must be A, C, G or T. " + (char) base + " is not a valid base.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -141,13 +141,6 @@ public class GeneralUtils {
|
|||
return normalized;
|
||||
}
|
||||
|
||||
public static double sum(double[] values) {
|
||||
double s = 0.0;
|
||||
for (double v : values)
|
||||
s += v;
|
||||
return s;
|
||||
}
|
||||
|
||||
public static double arrayMax(final double[] array) {
|
||||
return array[maxElementIndex(array, array.length)];
|
||||
}
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ import java.util.*;
|
|||
*
|
||||
* @author depristo
|
||||
*/
|
||||
final class CommonInfo {
|
||||
public final class CommonInfo {
|
||||
public static final double NO_LOG10_PERROR = 1.0;
|
||||
|
||||
private static Set<String> NO_FILTERS = Collections.emptySet();
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -28,15 +28,9 @@ package org.broadinstitute.variant.vcf;
|
|||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.apache.commons.io.FilenameUtils;
|
||||
import org.broad.tribble.FeatureCodecHeader;
|
||||
import org.broad.tribble.readers.PositionalBufferedStream;
|
||||
import org.broadinstitute.variant.utils.GeneralUtils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
public class VCFUtils {
|
||||
|
|
@ -106,21 +100,6 @@ public class VCFUtils {
|
|||
return new HashSet<VCFHeaderLine>(map.values());
|
||||
}
|
||||
|
||||
public static String rsIDOfFirstRealVariant(List<VariantContext> VCs, VariantContext.Type type) {
|
||||
if ( VCs == null )
|
||||
return null;
|
||||
|
||||
String rsID = null;
|
||||
for ( VariantContext vc : VCs ) {
|
||||
if ( vc.getType() == type ) {
|
||||
rsID = vc.getID();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return rsID;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add / replace the contig header lines in the VCFHeader with the in the reference file and master reference dictionary
|
||||
*
|
||||
|
|
@ -198,35 +177,6 @@ public class VCFUtils {
|
|||
return assembly;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read all of the VCF records from source into memory, returning the header and the VariantContexts
|
||||
*
|
||||
* @param source the file to read, must be in VCF4 format
|
||||
* @return
|
||||
* @throws java.io.IOException
|
||||
*/
|
||||
public static Pair<VCFHeader, List<VariantContext>> readVCF(final File source) throws IOException {
|
||||
// read in the features
|
||||
final List<VariantContext> vcs = new ArrayList<VariantContext>();
|
||||
final VCFCodec codec = new VCFCodec();
|
||||
PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(source));
|
||||
FeatureCodecHeader header = codec.readHeader(pbs);
|
||||
pbs.close();
|
||||
|
||||
pbs = new PositionalBufferedStream(new FileInputStream(source));
|
||||
pbs.skip(header.getHeaderEnd());
|
||||
|
||||
final VCFHeader vcfHeader = (VCFHeader)header.getHeaderValue();
|
||||
|
||||
while ( ! pbs.isDone() ) {
|
||||
final VariantContext vc = codec.decode(pbs);
|
||||
if ( vc != null )
|
||||
vcs.add(vc);
|
||||
}
|
||||
|
||||
return new Pair<VCFHeader, List<VariantContext>>(vcfHeader, vcs);
|
||||
}
|
||||
|
||||
/** Only displays a warning if warnings are enabled and an identical warning hasn't been already issued */
|
||||
private static final class HeaderConflictWarner {
|
||||
boolean emitWarnings;
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
|||
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.variant.bcf2.BCF2Utils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.vcf.VCFCodec;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@
|
|||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
|
|
|||
|
|
@ -36,11 +36,9 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextTestProvider;
|
||||
import org.broadinstitute.variant.vcf.VCFCodec;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
|
|
@ -252,13 +250,13 @@ public class BandPassActivityProfileUnitTest extends BaseTest {
|
|||
|
||||
final File file = new File(path);
|
||||
final VCFCodec codec = new VCFCodec();
|
||||
final Pair<VCFHeader, Iterable<VariantContext>> reader = VariantContextTestProvider.readAllVCs(file, codec);
|
||||
final VariantContextTestProvider.VariantContextContainer reader = VariantContextTestProvider.readAllVCs(file, codec);
|
||||
|
||||
final List<ActiveRegion> incRegions = new ArrayList<ActiveRegion>();
|
||||
final BandPassActivityProfile incProfile = new BandPassActivityProfile(genomeLocParser);
|
||||
final BandPassActivityProfile fullProfile = new BandPassActivityProfile(genomeLocParser);
|
||||
int pos = start;
|
||||
for ( final VariantContext vc : reader.getSecond() ) {
|
||||
for ( final VariantContext vc : reader.getVCs() ) {
|
||||
if ( vc == null ) continue;
|
||||
while ( pos < vc.getStart() ) {
|
||||
final GenomeLoc loc = genomeLocParser.createGenomeLoc(contig, pos);
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
|
|
@ -9,10 +9,10 @@
|
|||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
|
|
@ -23,33 +23,25 @@
|
|||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.variant.variantcontext;
|
||||
package org.broadinstitute.sting.utils.variant;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.variant.VariantBaseTest;
|
||||
import org.broadinstitute.variant.utils.GeneralUtils;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeSuite;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.*;
|
||||
|
||||
public class VariantContextUtilsUnitTest extends VariantBaseTest {
|
||||
public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
||||
|
||||
Allele Aref, T, C, G, Cref, ATC, ATCATC;
|
||||
|
||||
@BeforeSuite
|
||||
public void setup() {
|
||||
final File referenceFile = new File(b37KGReference);
|
||||
try {
|
||||
IndexedFastaSequenceFile seq = new IndexedFastaSequenceFile(referenceFile);
|
||||
}
|
||||
catch(FileNotFoundException ex) {
|
||||
throw new RuntimeException(referenceFile.getAbsolutePath(),ex);
|
||||
}
|
||||
|
||||
// alleles
|
||||
Aref = Allele.create("A", true);
|
||||
Cref = Allele.create("C", true);
|
||||
|
|
@ -186,10 +178,10 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest {
|
|||
|
||||
final List<String> priority = vcs2priority(inputs);
|
||||
|
||||
final VariantContext merged = VariantContextUtils.simpleMerge(
|
||||
final VariantContext merged = GATKVariantContextUtils.simpleMerge(
|
||||
inputs, priority,
|
||||
VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
|
||||
VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, "set", false, false);
|
||||
GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
|
||||
GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, "set", false, false);
|
||||
|
||||
Assert.assertEquals(merged.getAlleles(), cfg.expected);
|
||||
}
|
||||
|
|
@ -244,10 +236,10 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest {
|
|||
inputs.add(new VariantContextBuilder(snpVC1).id(id).make());
|
||||
}
|
||||
|
||||
final VariantContext merged = VariantContextUtils.simpleMerge(
|
||||
final VariantContext merged = GATKVariantContextUtils.simpleMerge(
|
||||
inputs, null,
|
||||
VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
|
||||
VariantContextUtils.GenotypeMergeType.UNSORTED, false, false, "set", false, false);
|
||||
GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
|
||||
GATKVariantContextUtils.GenotypeMergeType.UNSORTED, false, false, "set", false, false);
|
||||
Assert.assertEquals(merged.getID(), cfg.expected);
|
||||
}
|
||||
|
||||
|
|
@ -261,14 +253,14 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest {
|
|||
List<VariantContext> inputs;
|
||||
VariantContext expected;
|
||||
String setExpected;
|
||||
VariantContextUtils.FilteredRecordMergeType type;
|
||||
GATKVariantContextUtils.FilteredRecordMergeType type;
|
||||
|
||||
|
||||
private MergeFilteredTest(String name, VariantContext input1, VariantContext input2, VariantContext expected, String setExpected) {
|
||||
this(name, input1, input2, expected, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, setExpected);
|
||||
this(name, input1, input2, expected, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, setExpected);
|
||||
}
|
||||
|
||||
private MergeFilteredTest(String name, VariantContext input1, VariantContext input2, VariantContext expected, VariantContextUtils.FilteredRecordMergeType type, String setExpected) {
|
||||
private MergeFilteredTest(String name, VariantContext input1, VariantContext input2, VariantContext expected, GATKVariantContextUtils.FilteredRecordMergeType type, String setExpected) {
|
||||
super(MergeFilteredTest.class, name);
|
||||
LinkedList<VariantContext> all = new LinkedList<VariantContext>(Arrays.asList(input1, input2));
|
||||
this.expected = expected;
|
||||
|
|
@ -288,66 +280,66 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest {
|
|||
makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
|
||||
makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
|
||||
makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
|
||||
VariantContextUtils.MERGE_INTERSECTION);
|
||||
GATKVariantContextUtils.MERGE_INTERSECTION);
|
||||
|
||||
new MergeFilteredTest("noFilters",
|
||||
makeVC("1", Arrays.asList(Aref, T), "."),
|
||||
makeVC("2", Arrays.asList(Aref, T), "."),
|
||||
makeVC("3", Arrays.asList(Aref, T), "."),
|
||||
VariantContextUtils.MERGE_INTERSECTION);
|
||||
GATKVariantContextUtils.MERGE_INTERSECTION);
|
||||
|
||||
new MergeFilteredTest("oneFiltered",
|
||||
makeVC("1", Arrays.asList(Aref, T), "."),
|
||||
makeVC("2", Arrays.asList(Aref, T), "FAIL"),
|
||||
makeVC("3", Arrays.asList(Aref, T), "."),
|
||||
String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX));
|
||||
String.format("1-%s2", GATKVariantContextUtils.MERGE_FILTER_PREFIX));
|
||||
|
||||
new MergeFilteredTest("onePassOneFail",
|
||||
makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
|
||||
makeVC("2", Arrays.asList(Aref, T), "FAIL"),
|
||||
makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
|
||||
String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX));
|
||||
String.format("1-%s2", GATKVariantContextUtils.MERGE_FILTER_PREFIX));
|
||||
|
||||
new MergeFilteredTest("AllFiltered",
|
||||
makeVC("1", Arrays.asList(Aref, T), "FAIL"),
|
||||
makeVC("2", Arrays.asList(Aref, T), "FAIL"),
|
||||
makeVC("3", Arrays.asList(Aref, T), "FAIL"),
|
||||
VariantContextUtils.MERGE_FILTER_IN_ALL);
|
||||
GATKVariantContextUtils.MERGE_FILTER_IN_ALL);
|
||||
|
||||
// test ALL vs. ANY
|
||||
new MergeFilteredTest("FailOneUnfiltered",
|
||||
makeVC("1", Arrays.asList(Aref, T), "FAIL"),
|
||||
makeVC("2", Arrays.asList(Aref, T), "."),
|
||||
makeVC("3", Arrays.asList(Aref, T), "."),
|
||||
VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
|
||||
String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX));
|
||||
GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
|
||||
String.format("%s1-2", GATKVariantContextUtils.MERGE_FILTER_PREFIX));
|
||||
|
||||
new MergeFilteredTest("OneFailAllUnfilteredArg",
|
||||
makeVC("1", Arrays.asList(Aref, T), "FAIL"),
|
||||
makeVC("2", Arrays.asList(Aref, T), "."),
|
||||
makeVC("3", Arrays.asList(Aref, T), "FAIL"),
|
||||
VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ALL_UNFILTERED,
|
||||
String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX));
|
||||
GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ALL_UNFILTERED,
|
||||
String.format("%s1-2", GATKVariantContextUtils.MERGE_FILTER_PREFIX));
|
||||
|
||||
// test excluding allele in filtered record
|
||||
new MergeFilteredTest("DontIncludeAlleleOfFilteredRecords",
|
||||
makeVC("1", Arrays.asList(Aref, T), "."),
|
||||
makeVC("2", Arrays.asList(Aref, T), "FAIL"),
|
||||
makeVC("3", Arrays.asList(Aref, T), "."),
|
||||
String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX));
|
||||
String.format("1-%s2", GATKVariantContextUtils.MERGE_FILTER_PREFIX));
|
||||
|
||||
// promotion of site from unfiltered to PASSES
|
||||
new MergeFilteredTest("UnfilteredPlusPassIsPass",
|
||||
makeVC("1", Arrays.asList(Aref, T), "."),
|
||||
makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
|
||||
makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
|
||||
VariantContextUtils.MERGE_INTERSECTION);
|
||||
GATKVariantContextUtils.MERGE_INTERSECTION);
|
||||
|
||||
new MergeFilteredTest("RefInAll",
|
||||
makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS),
|
||||
makeVC("2", Arrays.asList(Aref), VariantContext.PASSES_FILTERS),
|
||||
makeVC("3", Arrays.asList(Aref), VariantContext.PASSES_FILTERS),
|
||||
VariantContextUtils.MERGE_REF_IN_ALL);
|
||||
GATKVariantContextUtils.MERGE_REF_IN_ALL);
|
||||
|
||||
new MergeFilteredTest("RefInOne",
|
||||
makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS),
|
||||
|
|
@ -361,8 +353,8 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest {
|
|||
@Test(dataProvider = "mergeFiltered")
|
||||
public void testMergeFiltered(MergeFilteredTest cfg) {
|
||||
final List<String> priority = vcs2priority(cfg.inputs);
|
||||
final VariantContext merged = VariantContextUtils.simpleMerge(
|
||||
cfg.inputs, priority, cfg.type, VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false);
|
||||
final VariantContext merged = GATKVariantContextUtils.simpleMerge(
|
||||
cfg.inputs, priority, cfg.type, GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false);
|
||||
|
||||
// test alleles are equal
|
||||
Assert.assertEquals(merged.getAlleles(), cfg.expected.getAlleles());
|
||||
|
|
@ -487,9 +479,9 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest {
|
|||
|
||||
@Test(dataProvider = "mergeGenotypes")
|
||||
public void testMergeGenotypes(MergeGenotypesTest cfg) {
|
||||
final VariantContext merged = VariantContextUtils.simpleMerge(
|
||||
cfg.inputs, cfg.priority, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
|
||||
VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false);
|
||||
final VariantContext merged = GATKVariantContextUtils.simpleMerge(
|
||||
cfg.inputs, cfg.priority, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
|
||||
GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false);
|
||||
|
||||
// test alleles are equal
|
||||
Assert.assertEquals(merged.getAlleles(), cfg.expected.getAlleles());
|
||||
|
|
@ -528,9 +520,9 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest {
|
|||
final VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, -1));
|
||||
final VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, -2));
|
||||
|
||||
final VariantContext merged = VariantContextUtils.simpleMerge(
|
||||
Arrays.asList(vc1, vc2), null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
|
||||
VariantContextUtils.GenotypeMergeType.UNIQUIFY, false, false, "set", false, false);
|
||||
final VariantContext merged = GATKVariantContextUtils.simpleMerge(
|
||||
Arrays.asList(vc1, vc2), null, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
|
||||
GATKVariantContextUtils.GenotypeMergeType.UNIQUIFY, false, false, "set", false, false);
|
||||
|
||||
// test genotypes
|
||||
Assert.assertEquals(merged.getSampleNames(), new HashSet<String>(Arrays.asList("s1.1", "s1.2")));
|
||||
|
|
@ -561,12 +553,12 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest {
|
|||
VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS);
|
||||
VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS);
|
||||
|
||||
final VariantContext merged = VariantContextUtils.simpleMerge(
|
||||
Arrays.asList(vc1, vc2), priority, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
|
||||
VariantContextUtils.GenotypeMergeType.PRIORITIZE, annotate, false, set, false, false);
|
||||
final VariantContext merged = GATKVariantContextUtils.simpleMerge(
|
||||
Arrays.asList(vc1, vc2), priority, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
|
||||
GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, annotate, false, set, false, false);
|
||||
|
||||
if ( annotate )
|
||||
Assert.assertEquals(merged.getAttribute(set), VariantContextUtils.MERGE_INTERSECTION);
|
||||
Assert.assertEquals(merged.getAttribute(set), GATKVariantContextUtils.MERGE_INTERSECTION);
|
||||
else
|
||||
Assert.assertFalse(merged.hasAttribute(set));
|
||||
}
|
||||
|
|
@ -583,78 +575,6 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest {
|
|||
return priority;
|
||||
}
|
||||
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Test repeats
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private class RepeatDetectorTest extends TestDataProvider {
|
||||
String ref;
|
||||
boolean isTrueRepeat;
|
||||
VariantContext vc;
|
||||
|
||||
private RepeatDetectorTest(boolean isTrueRepeat, String ref, String refAlleleString, String ... altAlleleStrings) {
|
||||
super(RepeatDetectorTest.class);
|
||||
this.isTrueRepeat = isTrueRepeat;
|
||||
this.ref = ref;
|
||||
|
||||
List<Allele> alleles = new LinkedList<Allele>();
|
||||
final Allele refAllele = Allele.create(refAlleleString, true);
|
||||
alleles.add(refAllele);
|
||||
for ( final String altString: altAlleleStrings) {
|
||||
final Allele alt = Allele.create(altString, false);
|
||||
alleles.add(alt);
|
||||
}
|
||||
|
||||
VariantContextBuilder builder = new VariantContextBuilder("test", "chr1", 1, refAllele.length(), alleles);
|
||||
this.vc = builder.make();
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("%s refBases=%s trueRepeat=%b vc=%s", super.toString(), ref, isTrueRepeat, vc);
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "RepeatDetectorTest")
|
||||
public Object[][] makeRepeatDetectorTest() {
|
||||
new RepeatDetectorTest(true, "NAAC", "N", "NA");
|
||||
new RepeatDetectorTest(true, "NAAC", "NA", "N");
|
||||
new RepeatDetectorTest(false, "NAAC", "NAA", "N");
|
||||
new RepeatDetectorTest(false, "NAAC", "N", "NC");
|
||||
new RepeatDetectorTest(false, "AAC", "A", "C");
|
||||
|
||||
// running out of ref bases => false
|
||||
new RepeatDetectorTest(false, "NAAC", "N", "NCAGTA");
|
||||
|
||||
// complex repeats
|
||||
new RepeatDetectorTest(true, "NATATATC", "N", "NAT");
|
||||
new RepeatDetectorTest(true, "NATATATC", "N", "NATA");
|
||||
new RepeatDetectorTest(true, "NATATATC", "N", "NATAT");
|
||||
new RepeatDetectorTest(true, "NATATATC", "NAT", "N");
|
||||
new RepeatDetectorTest(false, "NATATATC", "NATA", "N");
|
||||
new RepeatDetectorTest(false, "NATATATC", "NATAT", "N");
|
||||
|
||||
// multi-allelic
|
||||
new RepeatDetectorTest(true, "NATATATC", "N", "NAT", "NATAT");
|
||||
new RepeatDetectorTest(true, "NATATATC", "N", "NAT", "NATA");
|
||||
new RepeatDetectorTest(true, "NATATATC", "NAT", "N", "NATAT");
|
||||
new RepeatDetectorTest(true, "NATATATC", "NAT", "N", "NATA"); // two As
|
||||
new RepeatDetectorTest(false, "NATATATC", "NAT", "N", "NATC"); // false
|
||||
new RepeatDetectorTest(false, "NATATATC", "NAT", "N", "NCC"); // false
|
||||
new RepeatDetectorTest(false, "NATATATC", "NAT", "NATAT", "NCC"); // false
|
||||
|
||||
return RepeatDetectorTest.getTests(RepeatDetectorTest.class);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "RepeatDetectorTest")
|
||||
public void testRepeatDetectorTest(RepeatDetectorTest cfg) {
|
||||
|
||||
// test alleles are equal
|
||||
Assert.assertEquals(VariantContextUtils.isTandemRepeat(cfg.vc, cfg.ref.getBytes()), cfg.isTrueRepeat);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// basic allele clipping test
|
||||
|
|
@ -698,10 +618,11 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest {
|
|||
|
||||
@Test(dataProvider = "ReverseClippingPositionTestProvider")
|
||||
public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) {
|
||||
int result = VariantContextUtils.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false);
|
||||
int result = GATKVariantContextUtils.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false);
|
||||
Assert.assertEquals(result, cfg.expectedClip);
|
||||
}
|
||||
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// test splitting into bi-allelics
|
||||
|
|
@ -776,7 +697,7 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest {
|
|||
|
||||
@Test(dataProvider = "SplitBiallelics")
|
||||
public void testSplitBiallelicsNoGenotypes(final VariantContext vc, final List<VariantContext> expectedBiallelics) {
|
||||
final List<VariantContext> biallelics = VariantContextUtils.splitVariantContextToBiallelics(vc);
|
||||
final List<VariantContext> biallelics = GATKVariantContextUtils.splitVariantContextToBiallelics(vc);
|
||||
Assert.assertEquals(biallelics.size(), expectedBiallelics.size());
|
||||
for ( int i = 0; i < biallelics.size(); i++ ) {
|
||||
final VariantContext actual = biallelics.get(i);
|
||||
|
|
@ -790,14 +711,14 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest {
|
|||
final List<Genotype> genotypes = new ArrayList<Genotype>();
|
||||
|
||||
int sampleI = 0;
|
||||
for ( final List<Allele> alleles : GeneralUtils.makePermutations(vc.getAlleles(), 2, true) ) {
|
||||
for ( final List<Allele> alleles : Utils.makePermutations(vc.getAlleles(), 2, true) ) {
|
||||
genotypes.add(GenotypeBuilder.create("sample" + sampleI++, alleles));
|
||||
}
|
||||
genotypes.add(GenotypeBuilder.createMissing("missing", 2));
|
||||
|
||||
final VariantContext vcWithGenotypes = new VariantContextBuilder(vc).genotypes(genotypes).make();
|
||||
|
||||
final List<VariantContext> biallelics = VariantContextUtils.splitVariantContextToBiallelics(vcWithGenotypes);
|
||||
final List<VariantContext> biallelics = GATKVariantContextUtils.splitVariantContextToBiallelics(vcWithGenotypes);
|
||||
for ( int i = 0; i < biallelics.size(); i++ ) {
|
||||
final VariantContext actual = biallelics.get(i);
|
||||
Assert.assertEquals(actual.getNSamples(), vcWithGenotypes.getNSamples()); // not dropping any samples
|
||||
|
|
@ -812,4 +733,159 @@ public class VariantContextUtilsUnitTest extends VariantBaseTest {
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Test repeats
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private class RepeatDetectorTest extends TestDataProvider {
|
||||
String ref;
|
||||
boolean isTrueRepeat;
|
||||
VariantContext vc;
|
||||
|
||||
private RepeatDetectorTest(boolean isTrueRepeat, String ref, String refAlleleString, String ... altAlleleStrings) {
|
||||
super(RepeatDetectorTest.class);
|
||||
this.isTrueRepeat = isTrueRepeat;
|
||||
this.ref = ref;
|
||||
|
||||
List<Allele> alleles = new LinkedList<Allele>();
|
||||
final Allele refAllele = Allele.create(refAlleleString, true);
|
||||
alleles.add(refAllele);
|
||||
for ( final String altString: altAlleleStrings) {
|
||||
final Allele alt = Allele.create(altString, false);
|
||||
alleles.add(alt);
|
||||
}
|
||||
|
||||
VariantContextBuilder builder = new VariantContextBuilder("test", "chr1", 1, refAllele.length(), alleles);
|
||||
this.vc = builder.make();
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("%s refBases=%s trueRepeat=%b vc=%s", super.toString(), ref, isTrueRepeat, vc);
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "RepeatDetectorTest")
|
||||
public Object[][] makeRepeatDetectorTest() {
|
||||
new RepeatDetectorTest(true, "NAAC", "N", "NA");
|
||||
new RepeatDetectorTest(true, "NAAC", "NA", "N");
|
||||
new RepeatDetectorTest(false, "NAAC", "NAA", "N");
|
||||
new RepeatDetectorTest(false, "NAAC", "N", "NC");
|
||||
new RepeatDetectorTest(false, "AAC", "A", "C");
|
||||
|
||||
// running out of ref bases => false
|
||||
new RepeatDetectorTest(false, "NAAC", "N", "NCAGTA");
|
||||
|
||||
// complex repeats
|
||||
new RepeatDetectorTest(true, "NATATATC", "N", "NAT");
|
||||
new RepeatDetectorTest(true, "NATATATC", "N", "NATA");
|
||||
new RepeatDetectorTest(true, "NATATATC", "N", "NATAT");
|
||||
new RepeatDetectorTest(true, "NATATATC", "NAT", "N");
|
||||
new RepeatDetectorTest(false, "NATATATC", "NATA", "N");
|
||||
new RepeatDetectorTest(false, "NATATATC", "NATAT", "N");
|
||||
|
||||
// multi-allelic
|
||||
new RepeatDetectorTest(true, "NATATATC", "N", "NAT", "NATAT");
|
||||
new RepeatDetectorTest(true, "NATATATC", "N", "NAT", "NATA");
|
||||
new RepeatDetectorTest(true, "NATATATC", "NAT", "N", "NATAT");
|
||||
new RepeatDetectorTest(true, "NATATATC", "NAT", "N", "NATA"); // two As
|
||||
new RepeatDetectorTest(false, "NATATATC", "NAT", "N", "NATC"); // false
|
||||
new RepeatDetectorTest(false, "NATATATC", "NAT", "N", "NCC"); // false
|
||||
new RepeatDetectorTest(false, "NATATATC", "NAT", "NATAT", "NCC"); // false
|
||||
|
||||
return RepeatDetectorTest.getTests(RepeatDetectorTest.class);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "RepeatDetectorTest")
|
||||
public void testRepeatDetectorTest(RepeatDetectorTest cfg) {
|
||||
|
||||
// test alleles are equal
|
||||
Assert.assertEquals(GATKVariantContextUtils.isTandemRepeat(cfg.vc, cfg.ref.getBytes()), cfg.isTrueRepeat);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRepeatAllele() {
|
||||
Allele nullR = Allele.create("A", true);
|
||||
Allele nullA = Allele.create("A", false);
|
||||
Allele atc = Allele.create("AATC", false);
|
||||
Allele atcatc = Allele.create("AATCATC", false);
|
||||
Allele ccccR = Allele.create("ACCCC", true);
|
||||
Allele cc = Allele.create("ACC", false);
|
||||
Allele cccccc = Allele.create("ACCCCCC", false);
|
||||
Allele gagaR = Allele.create("AGAGA", true);
|
||||
Allele gagagaga = Allele.create("AGAGAGAGA", false);
|
||||
|
||||
// - / ATC [ref] from 20-22
|
||||
String delLoc = "chr1";
|
||||
int delLocStart = 20;
|
||||
int delLocStop = 22;
|
||||
|
||||
// - [ref] / ATC from 20-20
|
||||
String insLoc = "chr1";
|
||||
int insLocStart = 20;
|
||||
int insLocStop = 20;
|
||||
|
||||
Pair<List<Integer>,byte[]> result;
|
||||
byte[] refBytes = "TATCATCATCGGA".getBytes();
|
||||
|
||||
Assert.assertEquals(GATKVariantContextUtils.findNumberofRepetitions("ATG".getBytes(), "ATGATGATGATG".getBytes(), true),4);
|
||||
Assert.assertEquals(GATKVariantContextUtils.findNumberofRepetitions("G".getBytes(), "ATGATGATGATG".getBytes(), true),0);
|
||||
Assert.assertEquals(GATKVariantContextUtils.findNumberofRepetitions("T".getBytes(), "T".getBytes(), true),1);
|
||||
Assert.assertEquals(GATKVariantContextUtils.findNumberofRepetitions("AT".getBytes(), "ATGATGATCATG".getBytes(), true),1);
|
||||
Assert.assertEquals(GATKVariantContextUtils.findNumberofRepetitions("CCC".getBytes(), "CCCCCCCC".getBytes(), true),2);
|
||||
|
||||
Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("ATG".getBytes()),3);
|
||||
Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("AAA".getBytes()),1);
|
||||
Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("CACACAC".getBytes()),7);
|
||||
Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("CACACA".getBytes()),2);
|
||||
Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("CATGCATG".getBytes()),4);
|
||||
Assert.assertEquals(GATKVariantContextUtils.findRepeatedSubstring("AATAATA".getBytes()),7);
|
||||
|
||||
|
||||
// A*,ATC, context = ATC ATC ATC : (ATC)3 -> (ATC)4
|
||||
VariantContext vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(nullR,atc)).make();
|
||||
result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
|
||||
Assert.assertEquals(result.getFirst().toArray()[0],3);
|
||||
Assert.assertEquals(result.getFirst().toArray()[1],4);
|
||||
Assert.assertEquals(result.getSecond().length,3);
|
||||
|
||||
// ATC*,A,ATCATC
|
||||
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+3, Arrays.asList(Allele.create("AATC", true),nullA,atcatc)).make();
|
||||
result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
|
||||
Assert.assertEquals(result.getFirst().toArray()[0],3);
|
||||
Assert.assertEquals(result.getFirst().toArray()[1],2);
|
||||
Assert.assertEquals(result.getFirst().toArray()[2],4);
|
||||
Assert.assertEquals(result.getSecond().length,3);
|
||||
|
||||
// simple non-tandem deletion: CCCC*, -
|
||||
refBytes = "TCCCCCCCCATG".getBytes();
|
||||
vc = new VariantContextBuilder("foo", delLoc, 10, 14, Arrays.asList(ccccR,nullA)).make();
|
||||
result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
|
||||
Assert.assertEquals(result.getFirst().toArray()[0],8);
|
||||
Assert.assertEquals(result.getFirst().toArray()[1],4);
|
||||
Assert.assertEquals(result.getSecond().length,1);
|
||||
|
||||
// CCCC*,CC,-,CCCCCC, context = CCC: (C)7 -> (C)5,(C)3,(C)9
|
||||
refBytes = "TCCCCCCCAGAGAGAG".getBytes();
|
||||
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+4, Arrays.asList(ccccR,cc, nullA,cccccc)).make();
|
||||
result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
|
||||
Assert.assertEquals(result.getFirst().toArray()[0],7);
|
||||
Assert.assertEquals(result.getFirst().toArray()[1],5);
|
||||
Assert.assertEquals(result.getFirst().toArray()[2],3);
|
||||
Assert.assertEquals(result.getFirst().toArray()[3],9);
|
||||
Assert.assertEquals(result.getSecond().length,1);
|
||||
|
||||
// GAGA*,-,GAGAGAGA
|
||||
refBytes = "TGAGAGAGAGATTT".getBytes();
|
||||
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+4, Arrays.asList(gagaR, nullA,gagagaga)).make();
|
||||
result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
|
||||
Assert.assertEquals(result.getFirst().toArray()[0],5);
|
||||
Assert.assertEquals(result.getFirst().toArray()[1],3);
|
||||
Assert.assertEquals(result.getFirst().toArray()[2],7);
|
||||
Assert.assertEquals(result.getSecond().length,2);
|
||||
|
||||
}
|
||||
}
|
||||
|
|
@ -231,9 +231,9 @@ public class VariantContextBenchmark extends SimpleBenchmark {
|
|||
toMerge.add(new VariantContextBuilder(vc).genotypes(gc).make());
|
||||
}
|
||||
|
||||
VariantContextUtils.simpleMerge(toMerge, null,
|
||||
VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
|
||||
VariantContextUtils.GenotypeMergeType.UNSORTED,
|
||||
GATKVariantContextUtils.simpleMerge(toMerge, null,
|
||||
GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
|
||||
GATKVariantContextUtils.GenotypeMergeType.UNSORTED,
|
||||
true, false, "set", false, true);
|
||||
}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1,3 +1,28 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.variant;
|
||||
|
||||
import org.testng.Assert;
|
||||
|
|
|
|||
|
|
@ -32,7 +32,6 @@ import org.broadinstitute.variant.VariantBaseTest;
|
|||
import org.broadinstitute.variant.bcf2.BCF2Codec;
|
||||
import org.broadinstitute.variant.utils.GeneralUtils;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.variant.variantcontext.writer.Options;
|
||||
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
|
||||
import org.testng.Assert;
|
||||
|
|
@ -74,6 +73,24 @@ public class VariantContextTestProvider {
|
|||
}
|
||||
}
|
||||
|
||||
public static class VariantContextContainer {
|
||||
private VCFHeader header;
|
||||
private Iterable<VariantContext> vcs;
|
||||
|
||||
public VariantContextContainer( VCFHeader header, Iterable<VariantContext> vcs ) {
|
||||
this.header = header;
|
||||
this.vcs = vcs;
|
||||
}
|
||||
|
||||
public VCFHeader getHeader() {
|
||||
return header;
|
||||
}
|
||||
|
||||
public Iterable<VariantContext> getVCs() {
|
||||
return vcs;
|
||||
}
|
||||
}
|
||||
|
||||
public abstract static class VariantContextIOTest {
|
||||
public String toString() {
|
||||
return "VariantContextIOTest:" + getExtension();
|
||||
|
|
@ -150,15 +167,15 @@ public class VariantContextTestProvider {
|
|||
if ( ENABLE_SOURCE_VCF_TESTS ) {
|
||||
for ( final File file : testSourceVCFs ) {
|
||||
VCFCodec codec = new VCFCodec();
|
||||
Pair<VCFHeader, Iterable<VariantContext>> x = readAllVCs( file, codec );
|
||||
VariantContextContainer x = readAllVCs( file, codec );
|
||||
List<VariantContext> fullyDecoded = new ArrayList<VariantContext>();
|
||||
|
||||
for ( final VariantContext raw : x.getSecond() ) {
|
||||
for ( final VariantContext raw : x.getVCs() ) {
|
||||
if ( raw != null )
|
||||
fullyDecoded.add(raw.fullyDecode(x.getFirst(), false));
|
||||
fullyDecoded.add(raw.fullyDecode(x.getHeader(), false));
|
||||
}
|
||||
|
||||
TEST_DATAs.add(new VariantContextTestData(x.getFirst(), fullyDecoded));
|
||||
TEST_DATAs.add(new VariantContextTestData(x.getHeader(), fullyDecoded));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -616,8 +633,8 @@ public class VariantContextTestProvider {
|
|||
writeVCsToFile(writer, header, data.vcs);
|
||||
|
||||
// ensure writing of expected == actual
|
||||
final Pair<VCFHeader, Iterable<VariantContext>> p = readAllVCs(tmpFile, tester.makeCodec());
|
||||
final Iterable<VariantContext> actual = p.getSecond();
|
||||
final VariantContextContainer p = readAllVCs(tmpFile, tester.makeCodec());
|
||||
final Iterable<VariantContext> actual = p.getVCs();
|
||||
|
||||
int i = 0;
|
||||
for ( final VariantContext readVC : actual ) {
|
||||
|
|
@ -655,14 +672,14 @@ public class VariantContextTestProvider {
|
|||
writeVCsToFile(writer, header, vcs);
|
||||
|
||||
// ensure writing of expected == actual
|
||||
final Pair<VCFHeader, Iterable<VariantContext>> p = readAllVCs(tmpFile, tester.makeCodec());
|
||||
final Iterable<VariantContext> actual = p.getSecond();
|
||||
final VariantContextContainer p = readAllVCs(tmpFile, tester.makeCodec());
|
||||
final Iterable<VariantContext> actual = p.getVCs();
|
||||
assertEquals(actual, expected);
|
||||
|
||||
if ( recurse ) {
|
||||
// if we are doing a recursive test, grab a fresh iterator over the written values
|
||||
final Iterable<VariantContext> read = readAllVCs(tmpFile, tester.makeCodec()).getSecond();
|
||||
testReaderWriter(tester, p.getFirst(), expected, read, false);
|
||||
final Iterable<VariantContext> read = readAllVCs(tmpFile, tester.makeCodec()).getVCs();
|
||||
testReaderWriter(tester, p.getHeader(), expected, read, false);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -683,7 +700,7 @@ public class VariantContextTestProvider {
|
|||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
public final static Pair<VCFHeader, Iterable<VariantContext>> readAllVCs( final File source, final FeatureCodec<VariantContext> codec ) throws IOException {
|
||||
public final static VariantContextContainer readAllVCs( final File source, final FeatureCodec<VariantContext> codec ) throws IOException {
|
||||
// read in the features
|
||||
PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(source));
|
||||
FeatureCodecHeader header = codec.readHeader(pbs);
|
||||
|
|
@ -693,7 +710,7 @@ public class VariantContextTestProvider {
|
|||
pbs.skip(header.getHeaderEnd());
|
||||
|
||||
final VCFHeader vcfHeader = (VCFHeader)header.getHeaderValue();
|
||||
return new Pair<VCFHeader, Iterable<VariantContext>>(vcfHeader, new VCIterable(pbs, codec, vcfHeader));
|
||||
return new VariantContextContainer(vcfHeader, new VCIterable(pbs, codec, vcfHeader));
|
||||
}
|
||||
|
||||
public static class VCIterable implements Iterable<VariantContext>, Iterator<VariantContext> {
|
||||
|
|
@ -738,10 +755,10 @@ public class VariantContextTestProvider {
|
|||
}
|
||||
|
||||
public static void assertVCFandBCFFilesAreTheSame(final File vcfFile, final File bcfFile) throws IOException {
|
||||
final Pair<VCFHeader, Iterable<VariantContext>> vcfData = readAllVCs(vcfFile, new VCFCodec());
|
||||
final Pair<VCFHeader, Iterable<VariantContext>> bcfData = readAllVCs(bcfFile, new BCF2Codec());
|
||||
assertEquals(bcfData.getFirst(), vcfData.getFirst());
|
||||
assertEquals(bcfData.getSecond(), vcfData.getSecond());
|
||||
final VariantContextContainer vcfData = readAllVCs(vcfFile, new VCFCodec());
|
||||
final VariantContextContainer bcfData = readAllVCs(bcfFile, new BCF2Codec());
|
||||
assertEquals(bcfData.getHeader(), vcfData.getHeader());
|
||||
assertEquals(bcfData.getVCs(), vcfData.getVCs());
|
||||
}
|
||||
|
||||
public static void assertEquals(final Iterable<VariantContext> actual, final Iterable<VariantContext> expected) {
|
||||
|
|
|
|||
|
|
@ -28,9 +28,7 @@ package org.broadinstitute.variant.variantcontext;
|
|||
|
||||
// the imports for unit testing.
|
||||
|
||||
|
||||
import org.broadinstitute.variant.VariantBaseTest;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.testng.annotations.BeforeSuite;
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.DataProvider;
|
||||
|
|
@ -484,78 +482,6 @@ public class VariantContextUnitTest extends VariantBaseTest {
|
|||
Assert.assertNotNull(vc.getFiltersMaybeNull());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRepeatAllele() {
|
||||
Allele nullR = Allele.create("A", true);
|
||||
Allele nullA = Allele.create("A", false);
|
||||
Allele atc = Allele.create("AATC", false);
|
||||
Allele atcatc = Allele.create("AATCATC", false);
|
||||
Allele ccccR = Allele.create("ACCCC", true);
|
||||
Allele cc = Allele.create("ACC", false);
|
||||
Allele cccccc = Allele.create("ACCCCCC", false);
|
||||
Allele gagaR = Allele.create("AGAGA", true);
|
||||
Allele gagagaga = Allele.create("AGAGAGAGA", false);
|
||||
|
||||
Pair<List<Integer>,byte[]> result;
|
||||
byte[] refBytes = "TATCATCATCGGA".getBytes();
|
||||
|
||||
Assert.assertEquals(VariantContextUtils.findNumberofRepetitions("ATG".getBytes(), "ATGATGATGATG".getBytes(), true),4);
|
||||
Assert.assertEquals(VariantContextUtils.findNumberofRepetitions("G".getBytes(), "ATGATGATGATG".getBytes(), true),0);
|
||||
Assert.assertEquals(VariantContextUtils.findNumberofRepetitions("T".getBytes(), "T".getBytes(), true),1);
|
||||
Assert.assertEquals(VariantContextUtils.findNumberofRepetitions("AT".getBytes(), "ATGATGATCATG".getBytes(), true),1);
|
||||
Assert.assertEquals(VariantContextUtils.findNumberofRepetitions("CCC".getBytes(), "CCCCCCCC".getBytes(), true),2);
|
||||
|
||||
Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("ATG".getBytes()),3);
|
||||
Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("AAA".getBytes()),1);
|
||||
Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("CACACAC".getBytes()),7);
|
||||
Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("CACACA".getBytes()),2);
|
||||
Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("CATGCATG".getBytes()),4);
|
||||
Assert.assertEquals(VariantContextUtils.findRepeatedSubstring("AATAATA".getBytes()),7);
|
||||
|
||||
|
||||
// A*,ATC, context = ATC ATC ATC : (ATC)3 -> (ATC)4
|
||||
VariantContext vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStop, Arrays.asList(nullR,atc)).make();
|
||||
result = VariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
|
||||
Assert.assertEquals(result.getFirst().toArray()[0],3);
|
||||
Assert.assertEquals(result.getFirst().toArray()[1],4);
|
||||
Assert.assertEquals(result.getSecond().length,3);
|
||||
|
||||
// ATC*,A,ATCATC
|
||||
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+3, Arrays.asList(Allele.create("AATC", true),nullA,atcatc)).make();
|
||||
result = VariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
|
||||
Assert.assertEquals(result.getFirst().toArray()[0],3);
|
||||
Assert.assertEquals(result.getFirst().toArray()[1],2);
|
||||
Assert.assertEquals(result.getFirst().toArray()[2],4);
|
||||
Assert.assertEquals(result.getSecond().length,3);
|
||||
|
||||
// simple non-tandem deletion: CCCC*, -
|
||||
refBytes = "TCCCCCCCCATG".getBytes();
|
||||
vc = new VariantContextBuilder("foo", delLoc, 10, 14, Arrays.asList(ccccR,nullA)).make();
|
||||
result = VariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
|
||||
Assert.assertEquals(result.getFirst().toArray()[0],8);
|
||||
Assert.assertEquals(result.getFirst().toArray()[1],4);
|
||||
Assert.assertEquals(result.getSecond().length,1);
|
||||
|
||||
// CCCC*,CC,-,CCCCCC, context = CCC: (C)7 -> (C)5,(C)3,(C)9
|
||||
refBytes = "TCCCCCCCAGAGAGAG".getBytes();
|
||||
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+4, Arrays.asList(ccccR,cc, nullA,cccccc)).make();
|
||||
result = VariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
|
||||
Assert.assertEquals(result.getFirst().toArray()[0],7);
|
||||
Assert.assertEquals(result.getFirst().toArray()[1],5);
|
||||
Assert.assertEquals(result.getFirst().toArray()[2],3);
|
||||
Assert.assertEquals(result.getFirst().toArray()[3],9);
|
||||
Assert.assertEquals(result.getSecond().length,1);
|
||||
|
||||
// GAGA*,-,GAGAGAGA
|
||||
refBytes = "TGAGAGAGAGATTT".getBytes();
|
||||
vc = new VariantContextBuilder("foo", insLoc, insLocStart, insLocStart+4, Arrays.asList(gagaR, nullA,gagagaga)).make();
|
||||
result = VariantContextUtils.getNumTandemRepeatUnits(vc, refBytes);
|
||||
Assert.assertEquals(result.getFirst().toArray()[0],5);
|
||||
Assert.assertEquals(result.getFirst().toArray()[1],3);
|
||||
Assert.assertEquals(result.getFirst().toArray()[2],7);
|
||||
Assert.assertEquals(result.getSecond().length,2);
|
||||
|
||||
}
|
||||
@Test
|
||||
public void testGetGenotypeCounts() {
|
||||
List<Allele> alleles = Arrays.asList(Aref, T);
|
||||
|
|
|
|||
Loading…
Reference in New Issue