Merge branch 'master' of github.com:broadinstitute/gsa-unstable
This commit is contained in:
commit
e7d7d70247
|
|
@ -57,7 +57,7 @@ import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
|||
import org.broadinstitute.sting.utils.MannWhitneyU;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.variant.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
|
|
|
|||
|
|
@ -53,8 +53,8 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.vcf.VCFHeaderLineCount;
|
||||
import org.broadinstitute.variant.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.variant.vcf.VCFInfoHeaderLine;
|
||||
|
|
@ -79,7 +79,7 @@ public class TandemRepeatAnnotator extends InfoFieldAnnotation implements Standa
|
|||
if ( !vc.isIndel())
|
||||
return null;
|
||||
|
||||
Pair<List<Integer>,byte[]> result = VariantContextUtils.getNumTandemRepeatUnits(vc, ref.getForwardBases());
|
||||
Pair<List<Integer>,byte[]> result = GATKVariantContextUtils.getNumTandemRepeatUnits(vc, ref.getForwardBases());
|
||||
if (result == null)
|
||||
return null;
|
||||
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ import org.broadinstitute.sting.utils.MathUtils;
|
|||
import org.broadinstitute.variant.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
|
|
|||
|
|
@ -244,7 +244,7 @@ public class HeaderElement {
|
|||
*
|
||||
* @return whether or not the HeaderElement is variant due to excess insertions
|
||||
*/
|
||||
private boolean isVariantFromMismatches(double minVariantProportion) {
|
||||
protected boolean isVariantFromMismatches(double minVariantProportion) {
|
||||
BaseIndex mostCommon = consensusBaseCounts.baseIndexWithMostProbabilityWithoutIndels();
|
||||
double mostCommonProportion = consensusBaseCounts.baseCountProportionWithoutIndels(mostCommon);
|
||||
return mostCommonProportion != 0.0 && mostCommonProportion < (1 - minVariantProportion);
|
||||
|
|
@ -256,11 +256,11 @@ public class HeaderElement {
|
|||
*
|
||||
* @return true if we had more soft clipped bases contributing to this site than matches/mismatches.
|
||||
*/
|
||||
private boolean isVariantFromSoftClips() {
|
||||
return nSoftClippedBases >= (consensusBaseCounts.totalCount() - nSoftClippedBases);
|
||||
protected boolean isVariantFromSoftClips() {
|
||||
return nSoftClippedBases > 0 && nSoftClippedBases >= (consensusBaseCounts.totalCount() - nSoftClippedBases);
|
||||
}
|
||||
|
||||
private boolean basePassesFilters(byte baseQual, int minBaseQual, int baseMappingQuality, int minMappingQual) {
|
||||
protected boolean basePassesFilters(byte baseQual, int minBaseQual, int baseMappingQuality, int minMappingQual) {
|
||||
return baseQual >= minBaseQual && baseMappingQuality >= minMappingQual;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
|||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
|
|||
|
|
@ -78,6 +78,13 @@ public class SimpleGenomeLoc extends GenomeLoc {
|
|||
return finished;
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges 2 *contiguous* locs into 1
|
||||
*
|
||||
* @param a SimpleGenomeLoc #1
|
||||
* @param b SimpleGenomeLoc #2
|
||||
* @return one merged loc
|
||||
*/
|
||||
@Requires("a != null && b != null")
|
||||
public static SimpleGenomeLoc merge(SimpleGenomeLoc a, SimpleGenomeLoc b) throws ReviewedStingException {
|
||||
if(GenomeLoc.isUnmapped(a) || GenomeLoc.isUnmapped(b)) {
|
||||
|
|
@ -88,7 +95,6 @@ public class SimpleGenomeLoc extends GenomeLoc {
|
|||
throw new ReviewedStingException("The two genome locs need to be contiguous");
|
||||
}
|
||||
|
||||
|
||||
return new SimpleGenomeLoc(a.getContig(), a.contigIndex,
|
||||
Math.min(a.getStart(), b.getStart()),
|
||||
Math.max(a.getStop(), b.getStop()),
|
||||
|
|
@ -101,19 +107,22 @@ public class SimpleGenomeLoc extends GenomeLoc {
|
|||
* @param sortedLocs a sorted list of contiguous locs
|
||||
* @return one merged loc
|
||||
*/
|
||||
@Requires("sortedLocs != null")
|
||||
public static SimpleGenomeLoc merge(SortedSet<SimpleGenomeLoc> sortedLocs) {
|
||||
SimpleGenomeLoc previousLoc = null;
|
||||
for (SimpleGenomeLoc loc : sortedLocs) {
|
||||
if (loc.isUnmapped()) {
|
||||
SimpleGenomeLoc result = null;
|
||||
|
||||
for ( SimpleGenomeLoc loc : sortedLocs ) {
|
||||
if ( loc.isUnmapped() )
|
||||
throw new ReviewedStingException("Tried to merge unmapped genome locs");
|
||||
}
|
||||
if (previousLoc != null && !previousLoc.contiguousP(loc)) {
|
||||
|
||||
if ( result == null )
|
||||
result = loc;
|
||||
else if ( !result.contiguousP(loc) )
|
||||
throw new ReviewedStingException("The genome locs need to be contiguous");
|
||||
}
|
||||
previousLoc = loc;
|
||||
else
|
||||
result = merge(result, loc);
|
||||
}
|
||||
SimpleGenomeLoc firstLoc = sortedLocs.first();
|
||||
SimpleGenomeLoc lastLoc = sortedLocs.last();
|
||||
return merge(firstLoc, lastLoc);
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
||||
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
|
|
|
|||
|
|
@ -46,13 +46,14 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.sting.gatk.downsampling.ReservoirDownsampler;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;
|
||||
|
|
@ -135,6 +136,15 @@ public class SlidingWindow {
|
|||
return header.isEmpty() ? -1 : header.peek().getLocation();
|
||||
}
|
||||
|
||||
// for testing only
|
||||
protected SlidingWindow(final String contig, final int contigIndex, final int startLocation) {
|
||||
this.contig = contig;
|
||||
this.contigIndex = contigIndex;
|
||||
nContigs = 1;
|
||||
this.windowHeader = new LinkedList<HeaderElement>();
|
||||
windowHeader.addFirst(new HeaderElement(startLocation));
|
||||
this.readsInWindow = new TreeSet<GATKSAMRecord>();
|
||||
}
|
||||
|
||||
public SlidingWindow(String contig, int contigIndex, int contextSize, SAMFileHeader samHeader, GATKSAMReadGroupRecord readGroupAttribute, int windowNumber, final double minAltProportionToTriggerVariant, final double minIndelProportionToTriggerVariant, int minBaseQual, int minMappingQuality, int downsampleCoverage, final ReduceReads.DownsampleStrategy downsampleStrategy, boolean hasIndelQualities, int nContigs, boolean allowPolyploidReduction) {
|
||||
this.contextSize = contextSize;
|
||||
|
|
@ -193,14 +203,17 @@ public class SlidingWindow {
|
|||
}
|
||||
|
||||
/**
|
||||
* returns the next complete or incomplete variant region between 'from' (inclusive) and 'to' (exclusive)
|
||||
* Returns the next complete (or incomplete if closeLastRegion is true) variant region between 'from' (inclusive) and 'to' (exclusive)
|
||||
* but converted to global coordinates.
|
||||
*
|
||||
* @param from beginning window header index of the search window (inclusive)
|
||||
* @param to end window header index of the search window (exclusive)
|
||||
* @param from beginning window header index of the search window (inclusive); note that this uses local coordinates
|
||||
* @param to end window header index of the search window (exclusive); note that this uses local coordinates
|
||||
* @param variantSite boolean array with true marking variant regions
|
||||
* @return null if nothing is variant, start/stop if there is a complete variant region, start/-1 if there is an incomplete variant region.
|
||||
* @param closeLastRegion if the last index is variant (so it's an incomplete region), should we close (and return as an interval) the location or ignore it?
|
||||
* @return null if nothing is variant, start/stop if there is a complete variant region, start/-1 if there is an incomplete variant region. All coordinates returned are global.
|
||||
*/
|
||||
private SimpleGenomeLoc findNextVariantRegion(int from, int to, boolean[] variantSite, boolean forceClose) {
|
||||
@Requires({"from >= 0", "from <= to", "to <= variantSite.length"})
|
||||
private SimpleGenomeLoc findNextVariantRegion(int from, int to, boolean[] variantSite, boolean closeLastRegion) {
|
||||
boolean foundStart = false;
|
||||
final int windowHeaderStart = getStartLocation(windowHeader);
|
||||
int variantRegionStartIndex = 0;
|
||||
|
|
@ -215,22 +228,27 @@ public class SlidingWindow {
|
|||
}
|
||||
final int refStart = windowHeaderStart + variantRegionStartIndex;
|
||||
final int refStop = windowHeaderStart + to - 1;
|
||||
return (foundStart && forceClose) ? new SimpleGenomeLoc(contig, contigIndex, refStart, refStop, true) : null;
|
||||
return (foundStart && closeLastRegion) ? new SimpleGenomeLoc(contig, contigIndex, refStart, refStop, true) : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a list with all the complete and incomplete variant regions within 'from' (inclusive) and 'to' (exclusive)
|
||||
*
|
||||
* @param from beginning window header index of the search window (inclusive)
|
||||
* @param to end window header index of the search window (exclusive)
|
||||
* @param from beginning window header index of the search window (inclusive); note that this uses local coordinates
|
||||
* @param to end window header index of the search window (exclusive); note that this uses local coordinates
|
||||
* @param variantSite boolean array with true marking variant regions
|
||||
* @return a list with start/stops of variant regions following findNextVariantRegion description
|
||||
* @return a list with start/stops of variant regions following findNextVariantRegion description in global coordinates
|
||||
*/
|
||||
private CompressionStash findVariantRegions(int from, int to, boolean[] variantSite, boolean forceClose) {
|
||||
@Requires({"from >= 0", "from <= to", "to <= variantSite.length"})
|
||||
@Ensures("result != null")
|
||||
protected CompressionStash findVariantRegions(int from, int to, boolean[] variantSite, boolean closeLastRegion) {
|
||||
final int windowHeaderStart = getStartLocation(windowHeader);
|
||||
|
||||
CompressionStash regions = new CompressionStash();
|
||||
int index = from;
|
||||
while(index < to) {
|
||||
SimpleGenomeLoc result = findNextVariantRegion(index, to, variantSite, forceClose);
|
||||
// returns results in global coordinates
|
||||
SimpleGenomeLoc result = findNextVariantRegion(index, to, variantSite, closeLastRegion);
|
||||
if (result == null)
|
||||
break;
|
||||
|
||||
|
|
@ -238,7 +256,7 @@ public class SlidingWindow {
|
|||
if (!result.isFinished())
|
||||
break;
|
||||
|
||||
index = result.getStop() + 1;
|
||||
index = result.getStop() - windowHeaderStart + 1; // go back to local coordinates
|
||||
}
|
||||
return regions;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -51,6 +51,8 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||
import org.broadinstitute.sting.gatk.walkers.By;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
|
@ -62,6 +64,7 @@ import java.util.Map;
|
|||
* Date: 1/27/13
|
||||
* Time: 11:16 AM
|
||||
*/
|
||||
@By(DataSource.REFERENCE)
|
||||
public class BaseCoverageDistribution extends LocusWalker<Integer, Map<Integer, Long>> {
|
||||
@Output(required = true)
|
||||
private PrintStream out;
|
||||
|
|
|
|||
|
|
@ -85,7 +85,7 @@ public class FindCoveredIntervals extends ActiveRegionWalker<GenomeLoc, Long> {
|
|||
|
||||
@Override
|
||||
public GenomeLoc map(final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion, final RefMetaDataTracker tracker) {
|
||||
if (activeRegion.isActive)
|
||||
if (activeRegion.isActive())
|
||||
return activeRegion.getLocation();
|
||||
else
|
||||
return null;
|
||||
|
|
|
|||
|
|
@ -53,7 +53,8 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
@ -288,7 +289,7 @@ public class ConsensusAlleleCounter {
|
|||
if (vcs.isEmpty())
|
||||
return Collections.emptyList(); // nothing else to do, no alleles passed minimum count criterion
|
||||
|
||||
final VariantContext mergedVC = VariantContextUtils.simpleMerge(vcs, null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.UNSORTED, false, false, null, false, false);
|
||||
final VariantContext mergedVC = GATKVariantContextUtils.simpleMerge(vcs, null, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, GATKVariantContextUtils.GenotypeMergeType.UNSORTED, false, false, null, false, false);
|
||||
return mergedVC.getAlleles();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactACcounts;
|
|||
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.ExactACset;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.variant.vcf.VCFConstants;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
|
|||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.variant.vcf.VCFConstants;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
|
|
|
|||
|
|
@ -49,8 +49,8 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.pairhmm.PairHMM;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
|
||||
|
||||
public class UnifiedArgumentCollection extends StandardCallerArgumentCollection {
|
||||
|
||||
|
|
@ -172,7 +172,7 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
|
|||
Sample ploidy - equivalent to number of chromosomes per pool. In pooled experiments this should be = # of samples in pool * individual sample ploidy
|
||||
*/
|
||||
@Argument(shortName="ploidy", fullName="sample_ploidy", doc="Plody (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy).", required=false)
|
||||
public int samplePloidy = VariantContextUtils.DEFAULT_PLOIDY;
|
||||
public int samplePloidy = GATKVariantContextUtils.DEFAULT_PLOIDY;
|
||||
|
||||
@Hidden
|
||||
@Argument(shortName="minqs", fullName="min_quality_score", doc="Min quality score to consider. Smaller numbers process faster. Default: Q1.", required=false)
|
||||
|
|
|
|||
|
|
@ -61,7 +61,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
|
@ -304,7 +304,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
|||
headerInfo.add(new VCFInfoHeaderLine(UnifiedGenotyperEngine.NUMBER_OF_DISCOVERED_ALLELES_KEY, 1, VCFHeaderLineType.Integer, "Number of alternate alleles discovered (but not necessarily genotyped) at this site"));
|
||||
|
||||
// add the pool values for each genotype
|
||||
if (UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY) {
|
||||
if (UAC.samplePloidy != GATKVariantContextUtils.DEFAULT_PLOIDY) {
|
||||
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the alternate allele count, in the same order as listed, for each individual sample"));
|
||||
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the alternate allele fraction, in the same order as listed, for each individual sample"));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -61,6 +61,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcResult;
|
|||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.utils.BaseUtils;
|
||||
import org.broadinstitute.variant.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -134,7 +135,7 @@ public class UnifiedGenotyperEngine {
|
|||
// ---------------------------------------------------------------------------------------------------------
|
||||
@Requires({"toolkit != null", "UAC != null"})
|
||||
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) {
|
||||
this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), VariantContextUtils.DEFAULT_PLOIDY);
|
||||
this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), GATKVariantContextUtils.DEFAULT_PLOIDY);
|
||||
}
|
||||
|
||||
@Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0","ploidy>0"})
|
||||
|
|
@ -525,7 +526,7 @@ public class UnifiedGenotyperEngine {
|
|||
// if we are subsetting alleles (either because there were too many or because some were not polymorphic)
|
||||
// then we may need to trim the alleles (because the original VariantContext may have had to pad at the end).
|
||||
if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext ) // limitedContext callers need to handle allele trimming on their own to keep their perReadAlleleLikelihoodMap alleles in sync
|
||||
vcCall = VariantContextUtils.reverseTrimAlleles(vcCall);
|
||||
vcCall = GATKVariantContextUtils.reverseTrimAlleles(vcCall);
|
||||
|
||||
if ( annotationEngine != null && !limitedContext ) { // limitedContext callers need to handle annotations on their own by calling their own annotationEngine
|
||||
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
|
||||
|
|
@ -662,7 +663,7 @@ public class UnifiedGenotyperEngine {
|
|||
private void determineGLModelsToUse() {
|
||||
|
||||
String modelPrefix = "";
|
||||
if ( !UAC.GLmodel.name().contains(GPSTRING) && UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY )
|
||||
if ( !UAC.GLmodel.name().contains(GPSTRING) && UAC.samplePloidy != GATKVariantContextUtils.DEFAULT_PLOIDY )
|
||||
modelPrefix = GPSTRING;
|
||||
|
||||
if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") ) {
|
||||
|
|
|
|||
|
|
@ -47,6 +47,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -105,7 +106,7 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
|
|||
alleles.add(vc.getReference());
|
||||
alleles.addAll(chooseMostLikelyAlternateAlleles(vc, getMaxAltAlleles()));
|
||||
builder.alleles(alleles);
|
||||
builder.genotypes(VariantContextUtils.subsetDiploidAlleles(vc, alleles, false));
|
||||
builder.genotypes(GATKVariantContextUtils.subsetDiploidAlleles(vc, alleles, false));
|
||||
return builder.make();
|
||||
} else {
|
||||
return vc;
|
||||
|
|
@ -351,6 +352,6 @@ public abstract class DiploidExactAFCalc extends ExactAFCalc {
|
|||
final List<Allele> allelesToUse,
|
||||
final boolean assignGenotypes,
|
||||
final int ploidy) {
|
||||
return VariantContextUtils.subsetDiploidAlleles(vc, allelesToUse, assignGenotypes);
|
||||
return GATKVariantContextUtils.subsetDiploidAlleles(vc, allelesToUse, assignGenotypes);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,10 +47,10 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.variant.variantcontext.Genotype;
|
||||
import org.broadinstitute.variant.variantcontext.GenotypesContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
|
|
@ -92,7 +92,7 @@ abstract class ExactAFCalc extends AFCalc {
|
|||
if ( sample.hasLikelihoods() ) {
|
||||
double[] gls = sample.getLikelihoods().getAsVector();
|
||||
|
||||
if ( MathUtils.sum(gls) < VariantContextUtils.SUM_GL_THRESH_NOCALL )
|
||||
if ( MathUtils.sum(gls) < GATKVariantContextUtils.SUM_GL_THRESH_NOCALL )
|
||||
genotypeLikelihoods.add(gls);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -48,6 +48,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
|||
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.GeneralPloidyGenotypeLikelihoods;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
|
|
@ -553,7 +554,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
|
|||
}
|
||||
|
||||
// if there is no mass on the (new) likelihoods, then just no-call the sample
|
||||
if ( MathUtils.sum(newLikelihoods) > VariantContextUtils.SUM_GL_THRESH_NOCALL ) {
|
||||
if ( MathUtils.sum(newLikelihoods) > GATKVariantContextUtils.SUM_GL_THRESH_NOCALL ) {
|
||||
newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES));
|
||||
}
|
||||
else {
|
||||
|
|
@ -565,7 +566,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
|
|||
gb.PL(newLikelihoods);
|
||||
|
||||
// if we weren't asked to assign a genotype, then just no-call the sample
|
||||
if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > VariantContextUtils.SUM_GL_THRESH_NOCALL )
|
||||
if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > GATKVariantContextUtils.SUM_GL_THRESH_NOCALL )
|
||||
gb.alleles(NO_CALL_ALLELES);
|
||||
else
|
||||
assignGenotype(gb, newLikelihoods, allelesToUse, ploidy);
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@ import org.broadinstitute.sting.utils.*;
|
|||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.utils.BaseUtils;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
|
||||
|
|
@ -173,7 +174,7 @@ public class GenotypingEngine {
|
|||
validatePriorityList( priorityList, eventsAtThisLoc );
|
||||
|
||||
// Merge the event to find a common reference representation
|
||||
final VariantContext mergedVC = VariantContextUtils.simpleMerge(eventsAtThisLoc, priorityList, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
|
||||
final VariantContext mergedVC = GATKVariantContextUtils.simpleMerge(eventsAtThisLoc, priorityList, GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
|
||||
if( mergedVC == null ) { continue; }
|
||||
|
||||
if( eventsAtThisLoc.size() != mergedVC.getAlternateAlleles().size() ) {
|
||||
|
|
@ -203,7 +204,7 @@ public class GenotypingEngine {
|
|||
VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, call);
|
||||
|
||||
if( annotatedCall.getAlleles().size() != mergedVC.getAlleles().size() ) { // some alleles were removed so reverseTrimming might be necessary!
|
||||
annotatedCall = VariantContextUtils.reverseTrimAlleles(annotatedCall);
|
||||
annotatedCall = GATKVariantContextUtils.reverseTrimAlleles(annotatedCall);
|
||||
}
|
||||
|
||||
returnCalls.add( annotatedCall );
|
||||
|
|
|
|||
|
|
@ -72,6 +72,7 @@ import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
|||
import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileState;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
|
|
@ -297,7 +298,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
samplesList.addAll( samples );
|
||||
// initialize the UnifiedGenotyper Engine which is used to call into the exact model
|
||||
final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection( SCAC ); // this adapter is used so that the full set of unused UG arguments aren't exposed to the HC user
|
||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
|
||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY);
|
||||
|
||||
// create a UAC but with the exactCallsLog = null, so we only output the log for the HC caller itself, if requested
|
||||
UnifiedArgumentCollection simpleUAC = new UnifiedArgumentCollection(UAC);
|
||||
|
|
@ -307,7 +308,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
simpleUAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.min( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING ); // low values used for isActive determination only, default/user-specified values used for actual calling
|
||||
simpleUAC.CONTAMINATION_FRACTION = 0.0;
|
||||
simpleUAC.exactCallsLog = null;
|
||||
UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), simpleUAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
|
||||
UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), simpleUAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY);
|
||||
|
||||
// initialize the output VCF header
|
||||
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
|
||||
|
|
@ -462,15 +463,19 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
allelesToGenotype.removeAll( activeAllelesToGenotype );
|
||||
}
|
||||
|
||||
if( !activeRegion.isActive ) { return 0; } // Not active so nothing to do!
|
||||
if( !activeRegion.isActive()) { return 0; } // Not active so nothing to do!
|
||||
if( activeRegion.size() == 0 && UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { return 0; } // No reads here so nothing to do!
|
||||
if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES && activeAllelesToGenotype.isEmpty() ) { return 0; } // No alleles found in this region so nothing to do!
|
||||
|
||||
finalizeActiveRegion( activeRegion ); // merge overlapping fragments, clip adapter and low qual tails
|
||||
|
||||
// note this operation must be performed before we clip the reads down, as this must correspond to the full reference region
|
||||
final GenomeLoc fullSpanBeforeClipping = getPaddedLoc(activeRegion);
|
||||
|
||||
final Haplotype referenceHaplotype = new Haplotype(activeRegion.getActiveRegionReference(referenceReader), true); // Create the reference haplotype which is the bases from the reference that make up the active region
|
||||
final byte[] fullReferenceWithPadding = activeRegion.getFullReference(referenceReader, REFERENCE_PADDING);
|
||||
//int PRUNE_FACTOR = Math.max(MIN_PRUNE_FACTOR, determinePruneFactorFromCoverage( activeRegion ));
|
||||
final ArrayList<Haplotype> haplotypes = assemblyEngine.runLocalAssembly( activeRegion, referenceHaplotype, fullReferenceWithPadding, getPaddedLoc(activeRegion), MIN_PRUNE_FACTOR, activeAllelesToGenotype );
|
||||
final ArrayList<Haplotype> haplotypes = assemblyEngine.runLocalAssembly( activeRegion, referenceHaplotype, fullReferenceWithPadding, fullSpanBeforeClipping, MIN_PRUNE_FACTOR, activeAllelesToGenotype );
|
||||
if( haplotypes.size() == 1 ) { return 1; } // only the reference haplotype remains so nothing else to do!
|
||||
|
||||
activeRegion.hardClipToActiveRegion(); // only evaluate the parts of reads that are overlapping the active region
|
||||
|
|
@ -494,7 +499,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
stratifiedReadMap,
|
||||
perSampleFilteredReadList,
|
||||
fullReferenceWithPadding,
|
||||
getPaddedLoc(activeRegion),
|
||||
fullSpanBeforeClipping,
|
||||
activeRegion.getLocation(),
|
||||
getToolkit().getGenomeLocParser(),
|
||||
activeAllelesToGenotype ) ) {
|
||||
|
|
@ -504,9 +509,8 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
|
||||
if ( bamWriter != null ) {
|
||||
// write the haplotypes to the bam
|
||||
final GenomeLoc paddedRefLoc = getPaddedLoc(activeRegion);
|
||||
for ( Haplotype haplotype : haplotypes )
|
||||
writeHaplotype(haplotype, paddedRefLoc, bestHaplotypes.contains(haplotype));
|
||||
writeHaplotype(haplotype, fullSpanBeforeClipping, bestHaplotypes.contains(haplotype));
|
||||
|
||||
// we need to remap the Alleles back to the Haplotypes; inefficient but unfortunately this is a requirement currently
|
||||
final Map<Allele, Haplotype> alleleToHaplotypeMap = new HashMap<Allele, Haplotype>(haplotypes.size());
|
||||
|
|
@ -518,7 +522,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
for ( Map.Entry<GATKSAMRecord, Map<Allele, Double>> entry : readAlleleLikelihoodMap.getLikelihoodReadMap().entrySet() ) {
|
||||
final Allele bestAllele = PerReadAlleleLikelihoodMap.getMostLikelyAllele(entry.getValue());
|
||||
if ( bestAllele != Allele.NO_CALL )
|
||||
writeReadAgainstHaplotype(entry.getKey(), alleleToHaplotypeMap.get(bestAllele), paddedRefLoc.getStart());
|
||||
writeReadAgainstHaplotype(entry.getKey(), alleleToHaplotypeMap.get(bestAllele), fullSpanBeforeClipping.getStart());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -558,7 +562,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
private void finalizeActiveRegion( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) {
|
||||
if( DEBUG ) { System.out.println("\nAssembling " + activeRegion.getLocation() + " with " + activeRegion.size() + " reads: (with overlap region = " + activeRegion.getExtendedLoc() + ")"); }
|
||||
final ArrayList<GATKSAMRecord> finalizedReadList = new ArrayList<GATKSAMRecord>();
|
||||
final FragmentCollection<GATKSAMRecord> fragmentCollection = FragmentUtils.create( ReadUtils.sortReadsByCoordinate(activeRegion.getReads()) );
|
||||
final FragmentCollection<GATKSAMRecord> fragmentCollection = FragmentUtils.create( activeRegion.getReads() );
|
||||
activeRegion.clearReads();
|
||||
|
||||
// Join overlapping paired reads to create a single longer read
|
||||
|
|
@ -570,17 +574,20 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
Collections.shuffle(finalizedReadList, GenomeAnalysisEngine.getRandomGenerator());
|
||||
|
||||
// Loop through the reads hard clipping the adaptor and low quality tails
|
||||
final ArrayList<GATKSAMRecord> readsToUse = new ArrayList<GATKSAMRecord>(finalizedReadList.size());
|
||||
for( final GATKSAMRecord myRead : finalizedReadList ) {
|
||||
final GATKSAMRecord postAdapterRead = ( myRead.getReadUnmappedFlag() ? myRead : ReadClipper.hardClipAdaptorSequence( myRead ) );
|
||||
if( postAdapterRead != null && !postAdapterRead.isEmpty() && postAdapterRead.getCigar().getReadLength() > 0 ) {
|
||||
final GATKSAMRecord clippedRead = ReadClipper.hardClipLowQualEnds( postAdapterRead, MIN_TAIL_QUALITY );
|
||||
// protect against INTERVALS with abnormally high coverage
|
||||
// BUGBUG: remove when positional downsampler is hooked up to ART/HC
|
||||
if( clippedRead.getReadLength() > 0 && activeRegion.size() < samplesList.size() * DOWNSAMPLE_PER_SAMPLE_PER_REGION ) {
|
||||
activeRegion.add(clippedRead);
|
||||
// TODO BUGBUG: remove when positional downsampler is hooked up to ART/HC
|
||||
if( activeRegion.readOverlapsRegion(clippedRead) &&
|
||||
clippedRead.getReadLength() > 0 && activeRegion.size() < samplesList.size() * DOWNSAMPLE_PER_SAMPLE_PER_REGION ) {
|
||||
readsToUse.add(clippedRead);
|
||||
}
|
||||
}
|
||||
}
|
||||
activeRegion.addAll(ReadUtils.sortReadsByCoordinate(readsToUse));
|
||||
}
|
||||
|
||||
private List<GATKSAMRecord> filterNonPassingReads( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) {
|
||||
|
|
@ -595,9 +602,9 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
}
|
||||
|
||||
private GenomeLoc getPaddedLoc( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) {
|
||||
final int padLeft = Math.max(activeRegion.getReferenceLoc().getStart()-REFERENCE_PADDING, 1);
|
||||
final int padRight = Math.min(activeRegion.getReferenceLoc().getStop()+REFERENCE_PADDING, referenceReader.getSequenceDictionary().getSequence(activeRegion.getReferenceLoc().getContig()).getSequenceLength());
|
||||
return getToolkit().getGenomeLocParser().createGenomeLoc(activeRegion.getReferenceLoc().getContig(), padLeft, padRight);
|
||||
final int padLeft = Math.max(activeRegion.getReadSpanLoc().getStart()-REFERENCE_PADDING, 1);
|
||||
final int padRight = Math.min(activeRegion.getReadSpanLoc().getStop()+REFERENCE_PADDING, referenceReader.getSequenceDictionary().getSequence(activeRegion.getReadSpanLoc().getContig()).getSequenceLength());
|
||||
return getToolkit().getGenomeLocParser().createGenomeLoc(activeRegion.getReadSpanLoc().getContig(), padLeft, padRight);
|
||||
}
|
||||
|
||||
private HashMap<String, ArrayList<GATKSAMRecord>> splitReadsBySample( final List<GATKSAMRecord> reads ) {
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ import org.broadinstitute.sting.gatk.walkers.BAQMode;
|
|||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
|
|||
|
|
@ -59,6 +59,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
|||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
|
||||
|
|
@ -444,7 +445,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
|
|||
ArrayList<String> rodNames = new ArrayList<String>();
|
||||
rodNames.add(variantCollection.variants.getName());
|
||||
Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames);
|
||||
Set<String> vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
|
||||
Set<String> vcfSamples = SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
|
||||
|
||||
//Get the trios from the families passed as ped
|
||||
setTrios();
|
||||
|
|
|
|||
|
|
@ -58,12 +58,12 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
|||
import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
import org.broadinstitute.variant.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextBuilder;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.variant.vcf.VCFUtils;
|
||||
|
||||
|
|
@ -327,7 +327,7 @@ public class GenotypeAndValidate extends RodWalker<GenotypeAndValidate.CountedDa
|
|||
// Initialize VCF header
|
||||
if (vcfWriter != null) {
|
||||
Map<String, VCFHeader> header = GATKVCFUtils.getVCFHeadersFromRodPrefix(getToolkit(), alleles.getName());
|
||||
samples = SampleUtils.getSampleList(header, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
|
||||
samples = SampleUtils.getSampleList(header, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
|
||||
Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(header.values(), true);
|
||||
headerLines.add(new VCFHeaderLine("source", "GenotypeAndValidate"));
|
||||
vcfWriter.writeHeader(new VCFHeader(headerLines, samples));
|
||||
|
|
|
|||
|
|
@ -54,12 +54,12 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
import org.broadinstitute.variant.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -227,7 +227,7 @@ public class ValidationSiteSelector extends RodWalker<Integer, Integer> {
|
|||
public void initialize() {
|
||||
// Get list of samples to include in the output
|
||||
Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit());
|
||||
TreeSet<String> vcfSamples = new TreeSet<String>(SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
|
||||
TreeSet<String> vcfSamples = new TreeSet<String>(SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
|
||||
|
||||
Collection<String> samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFiles);
|
||||
Collection<String> samplesFromExpressions = SampleUtils.matchSamplesExpressions(vcfSamples, sampleExpressions);
|
||||
|
|
|
|||
|
|
@ -61,6 +61,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
|||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
|
|
@ -115,7 +116,7 @@ public class RegenotypeVariants extends RodWalker<Integer, Integer> implements T
|
|||
|
||||
String trackName = variantCollection.variants.getName();
|
||||
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName));
|
||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
|
||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY);
|
||||
|
||||
final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(trackName)));
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ import com.google.java.contract.Requires;
|
|||
import org.apache.commons.math.MathException;
|
||||
import org.apache.commons.math.stat.inference.ChiSquareTestImpl;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.Collection;
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ import org.broadinstitute.sting.utils.Utils;
|
|||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
|||
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,45 +1,45 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
*
|
||||
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
|
||||
*
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
|
||||
*
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
*
|
||||
* 4. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
*
|
||||
* 5. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
*
|
||||
* 6. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
*
|
||||
* 7. MISCELLANEOUS
|
||||
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
|
@ -51,9 +51,9 @@ import com.google.java.contract.Requires;
|
|||
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.utils.BaseUtils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
|
|
@ -112,7 +112,7 @@ public abstract class RepeatCovariate implements ExperimentalCovariate {
|
|||
|
||||
// get backward repeat unit and # repeats
|
||||
byte[] backwardRepeatUnit = Arrays.copyOfRange(readBases, offset - str + 1, offset + 1);
|
||||
maxBW = VariantContextUtils.findNumberofRepetitions(backwardRepeatUnit, Arrays.copyOfRange(readBases, 0, offset + 1), false);
|
||||
maxBW = GATKVariantContextUtils.findNumberofRepetitions(backwardRepeatUnit, Arrays.copyOfRange(readBases, 0, offset + 1), false);
|
||||
if (maxBW > 1) {
|
||||
bestBWRepeatUnit = backwardRepeatUnit.clone();
|
||||
break;
|
||||
|
|
@ -132,7 +132,7 @@ public abstract class RepeatCovariate implements ExperimentalCovariate {
|
|||
|
||||
// get forward repeat unit and # repeats
|
||||
byte[] forwardRepeatUnit = Arrays.copyOfRange(readBases, offset +1, offset+str+1);
|
||||
maxFW = VariantContextUtils.findNumberofRepetitions(forwardRepeatUnit,Arrays.copyOfRange(readBases, offset+1, readBases.length), true);
|
||||
maxFW = GATKVariantContextUtils.findNumberofRepetitions(forwardRepeatUnit, Arrays.copyOfRange(readBases, offset + 1, readBases.length), true);
|
||||
if (maxFW > 1) {
|
||||
bestFWRepeatUnit = forwardRepeatUnit.clone();
|
||||
break;
|
||||
|
|
@ -150,7 +150,7 @@ public abstract class RepeatCovariate implements ExperimentalCovariate {
|
|||
// but correct representation at that place might be (C)4.
|
||||
// Hence, if the FW and BW units don't match, check if BW unit can still be a part of FW unit and add
|
||||
// representations to total
|
||||
maxBW = VariantContextUtils.findNumberofRepetitions(bestFWRepeatUnit, Arrays.copyOfRange(readBases, 0, offset + 1), false);
|
||||
maxBW = GATKVariantContextUtils.findNumberofRepetitions(bestFWRepeatUnit, Arrays.copyOfRange(readBases, 0, offset + 1), false);
|
||||
maxRL = maxFW + maxBW;
|
||||
bestRepeatUnit = bestFWRepeatUnit;
|
||||
|
||||
|
|
|
|||
|
|
@ -48,18 +48,6 @@ package org.broadinstitute.sting.utils.recalibration.covariates;
|
|||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.utils.BaseUtils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
|
||||
public class RepeatUnitAndLengthCovariate extends RepeatCovariate {
|
||||
|
|
|
|||
|
|
@ -1,65 +1,51 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
*
|
||||
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
|
||||
*
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
|
||||
*
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
*
|
||||
* 4. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
*
|
||||
* 5. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
*
|
||||
* 6. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
*
|
||||
* 7. MISCELLANEOUS
|
||||
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.recalibration.covariates;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.variant.utils.BaseUtils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
|
|
|
|||
|
|
@ -0,0 +1,134 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 4. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 5. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 6. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 7. MISCELLANEOUS
|
||||
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
||||
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class HeaderElementUnitTest extends BaseTest {
|
||||
|
||||
private class HETest {
|
||||
public byte base, baseQual, insQual, delQual;
|
||||
public int MQ;
|
||||
public boolean isClip;
|
||||
|
||||
private HETest(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int MQ, final boolean isClip) {
|
||||
this.base = base;
|
||||
this.baseQual = baseQual;
|
||||
this.insQual = insQual;
|
||||
this.delQual = delQual;
|
||||
this.MQ = MQ;
|
||||
this.isClip = isClip;
|
||||
}
|
||||
}
|
||||
|
||||
private static final byte byteA = (byte)'A';
|
||||
private static final byte byte10 = (byte)10;
|
||||
private static final byte byte20 = (byte)20;
|
||||
private static final int minBaseQual = 20;
|
||||
private static final int minMappingQual = 20;
|
||||
|
||||
@DataProvider(name = "data")
|
||||
public Object[][] createData() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
tests.add(new Object[]{new HETest(byteA, byte20, byte20, byte20, 20, false)});
|
||||
tests.add(new Object[]{new HETest(byteA, byte10, byte20, byte20, 20, false)});
|
||||
tests.add(new Object[]{new HETest(byteA, byte20, byte20, byte20, 10, false)});
|
||||
tests.add(new Object[]{new HETest(byteA, byte20, byte20, byte20, 20, true)});
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "data", enabled = true)
|
||||
public void testHE(HETest test) {
|
||||
|
||||
HeaderElement headerElement = new HeaderElement(1000, 0);
|
||||
|
||||
// first test that if we add and then remove it, we have no data
|
||||
headerElement.addBase(test.base, test.baseQual, test.insQual, test.delQual, test.MQ, minBaseQual, minMappingQual, test.isClip);
|
||||
headerElement.addInsertionToTheRight();
|
||||
headerElement.removeBase(test.base, test.baseQual, test.insQual, test.delQual, test.MQ, minBaseQual, minMappingQual, test.isClip);
|
||||
headerElement.removeInsertionToTheRight();
|
||||
testHeaderIsEmpty(headerElement);
|
||||
|
||||
// now, test that the data was added as expected
|
||||
for ( int i = 0; i < 10; i++ )
|
||||
headerElement.addBase(test.base, test.baseQual, test.insQual, test.delQual, test.MQ, minBaseQual, minMappingQual, test.isClip);
|
||||
testHeaderData(headerElement, test);
|
||||
|
||||
// test the insertion adding functionality
|
||||
for ( int i = 0; i < 10; i++ )
|
||||
headerElement.addInsertionToTheRight();
|
||||
Assert.assertEquals(headerElement.numInsertionsToTheRight(), 10);
|
||||
}
|
||||
|
||||
private void testHeaderIsEmpty(final HeaderElement headerElement) {
|
||||
Assert.assertFalse(headerElement.hasConsensusData());
|
||||
Assert.assertFalse(headerElement.hasFilteredData());
|
||||
Assert.assertFalse(headerElement.hasInsertionToTheRight());
|
||||
Assert.assertTrue(headerElement.isEmpty());
|
||||
Assert.assertEquals(headerElement.getRMS(), 0.0);
|
||||
}
|
||||
|
||||
private void testHeaderData(final HeaderElement headerElement, final HETest test) {
|
||||
Assert.assertEquals(headerElement.getRMS(), (double)test.MQ);
|
||||
Assert.assertEquals(headerElement.isVariantFromSoftClips(), test.isClip);
|
||||
Assert.assertFalse(headerElement.isEmpty());
|
||||
Assert.assertFalse(headerElement.hasInsertionToTheRight());
|
||||
Assert.assertEquals(headerElement.hasConsensusData(), headerElement.basePassesFilters(test.baseQual, minBaseQual, test.MQ, minMappingQual));
|
||||
Assert.assertEquals(headerElement.hasFilteredData(), !headerElement.basePassesFilters(test.baseQual, minBaseQual, test.MQ, minMappingQual));
|
||||
Assert.assertFalse(headerElement.isVariantFromMismatches(0.05));
|
||||
Assert.assertEquals(headerElement.isVariant(0.05, 0.05), test.isClip);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,106 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 4. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 5. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 6. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 7. MISCELLANEOUS
|
||||
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
||||
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class SimpleGenomeLocUnitTest extends BaseTest {
|
||||
|
||||
private static final SimpleGenomeLoc loc1 = new SimpleGenomeLoc("1", 0, 10, 20, false);
|
||||
private static final SimpleGenomeLoc loc2 = new SimpleGenomeLoc("1", 0, 21, 30, false);
|
||||
private static final SimpleGenomeLoc loc3 = new SimpleGenomeLoc("1", 0, 31, 40, false);
|
||||
|
||||
private class SGLTest {
|
||||
public List<SimpleGenomeLoc> locs;
|
||||
|
||||
private SGLTest(final List<SimpleGenomeLoc> locs) {
|
||||
this.locs = locs;
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "SGLtest")
|
||||
public Object[][] createFindVariantRegionsData() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
tests.add(new Object[]{new SGLTest(Arrays.<SimpleGenomeLoc>asList(loc1))});
|
||||
tests.add(new Object[]{new SGLTest(Arrays.<SimpleGenomeLoc>asList(loc1, loc2))});
|
||||
tests.add(new Object[]{new SGLTest(Arrays.<SimpleGenomeLoc>asList(loc1, loc2, loc3))});
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "SGLtest", enabled = true)
|
||||
public void testSimpleGenomeLoc(SGLTest test) {
|
||||
testMerge(test.locs);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = ReviewedStingException.class)
|
||||
public void testNotContiguousLocs() {
|
||||
final List<SimpleGenomeLoc> locs = new ArrayList<SimpleGenomeLoc>(1);
|
||||
locs.add(loc1);
|
||||
locs.add(loc3);
|
||||
testMerge(locs);
|
||||
}
|
||||
|
||||
private void testMerge(final List<SimpleGenomeLoc> locs) {
|
||||
SimpleGenomeLoc result1 = locs.get(0);
|
||||
for ( int i = 1; i < locs.size(); i++ )
|
||||
result1 = SimpleGenomeLoc.merge(result1, locs.get(i));
|
||||
|
||||
SimpleGenomeLoc result2 = SimpleGenomeLoc.merge(new TreeSet<SimpleGenomeLoc>(locs));
|
||||
Assert.assertEquals(result1, result2);
|
||||
Assert.assertEquals(result1.getStart(), locs.get(0).getStart());
|
||||
Assert.assertEquals(result1.getStop(), locs.get(locs.size() - 1).getStop());
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,248 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 4. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 5. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 6. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 7. MISCELLANEOUS
|
||||
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSingleSampleReadStream;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSingleSampleReadStreamAnalyzer;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public class SlidingWindowUnitTest extends BaseTest {
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
//// This section tests the findVariantRegions() method and related functionality ////
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
private static final int variantRegionLength = 1000;
|
||||
private static final int globalStartPosition = 1000000;
|
||||
private static final SimpleGenomeLoc loc90to95 = new SimpleGenomeLoc("1", 0, 1000090, 1000095, false);
|
||||
private static final SimpleGenomeLoc loc96to99 = new SimpleGenomeLoc("1", 0, 1000096, 1000099, false);
|
||||
private static final SimpleGenomeLoc loc100to110 = new SimpleGenomeLoc("1", 0, 1000100, 1000110, false);
|
||||
private static final SimpleGenomeLoc loc999 = new SimpleGenomeLoc("1", 0, 1000999, 1000999, false);
|
||||
|
||||
private class FindVariantRegionsTest {
|
||||
public List<SimpleGenomeLoc> locs, expectedResult;
|
||||
public boolean[] variantRegionBitset;
|
||||
|
||||
private FindVariantRegionsTest(final List<SimpleGenomeLoc> locs) {
|
||||
this.locs = locs;
|
||||
this.expectedResult = locs;
|
||||
variantRegionBitset = createBitset(locs);
|
||||
}
|
||||
|
||||
private FindVariantRegionsTest(final List<SimpleGenomeLoc> locs, final List<SimpleGenomeLoc> expectedResult) {
|
||||
this.locs = locs;
|
||||
this.expectedResult = expectedResult;
|
||||
variantRegionBitset = createBitset(locs);
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean[] createBitset(final List<SimpleGenomeLoc> locs) {
|
||||
boolean[] variantRegionBitset = new boolean[variantRegionLength];
|
||||
for ( SimpleGenomeLoc loc : locs ) {
|
||||
final int stop = loc.getStop() - globalStartPosition;
|
||||
for ( int i = loc.getStart() - globalStartPosition; i <= stop; i++ )
|
||||
variantRegionBitset[i] = true;
|
||||
}
|
||||
return variantRegionBitset;
|
||||
}
|
||||
|
||||
@DataProvider(name = "findVariantRegions")
|
||||
public Object[][] createFindVariantRegionsData() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
tests.add(new Object[]{new FindVariantRegionsTest(Arrays.<SimpleGenomeLoc>asList(loc90to95))});
|
||||
tests.add(new Object[]{new FindVariantRegionsTest(Arrays.<SimpleGenomeLoc>asList(loc90to95, loc100to110))});
|
||||
tests.add(new Object[]{new FindVariantRegionsTest(Arrays.<SimpleGenomeLoc>asList(loc90to95, loc96to99, loc100to110), Arrays.<SimpleGenomeLoc>asList(new SimpleGenomeLoc("1", 0, 1000090, 1000110, false)))});
|
||||
tests.add(new Object[]{new FindVariantRegionsTest(Arrays.<SimpleGenomeLoc>asList(loc90to95, loc999))});
|
||||
tests.add(new Object[]{new FindVariantRegionsTest(Arrays.<SimpleGenomeLoc>asList(loc999))});
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "findVariantRegions", enabled = true)
|
||||
public void testFindVariantRegions(FindVariantRegionsTest test) {
|
||||
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, globalStartPosition);
|
||||
final CompressionStash locs = slidingWindow.findVariantRegions(0, variantRegionLength, test.variantRegionBitset, true);
|
||||
int index = 0;
|
||||
for ( final SimpleGenomeLoc loc : locs ) {
|
||||
Assert.assertTrue(loc.equals(test.expectedResult.get(index++)));
|
||||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testNoClosingRegions() {
|
||||
final SlidingWindow slidingWindow = new SlidingWindow("1", 0, globalStartPosition);
|
||||
final CompressionStash locs = slidingWindow.findVariantRegions(0, variantRegionLength, createBitset(Arrays.<SimpleGenomeLoc>asList(loc90to95, loc999)), false);
|
||||
Assert.assertEquals(locs.size(), 1);
|
||||
Assert.assertEquals(locs.iterator().next(), loc90to95);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
|
||||
private static class DownsamplingReadsIteratorTest extends TestDataProvider {
|
||||
private DownsamplingReadsIterator downsamplingIter;
|
||||
private int targetCoverage;
|
||||
private ArtificialSingleSampleReadStream stream;
|
||||
private ArtificialSingleSampleReadStreamAnalyzer streamAnalyzer;
|
||||
|
||||
public DownsamplingReadsIteratorTest( ArtificialSingleSampleReadStream stream, int targetCoverage ) {
|
||||
super(DownsamplingReadsIteratorTest.class);
|
||||
|
||||
this.stream = stream;
|
||||
this.targetCoverage = targetCoverage;
|
||||
|
||||
setName(String.format("%s: targetCoverage=%d numContigs=%d stacksPerContig=%d readsPerStack=%d-%d distanceBetweenStacks=%d-%d readLength=%d-%d unmappedReads=%d",
|
||||
getClass().getSimpleName(),
|
||||
targetCoverage,
|
||||
stream.getNumContigs(),
|
||||
stream.getNumStacksPerContig(),
|
||||
stream.getMinReadsPerStack(),
|
||||
stream.getMaxReadsPerStack(),
|
||||
stream.getMinDistanceBetweenStacks(),
|
||||
stream.getMaxDistanceBetweenStacks(),
|
||||
stream.getMinReadLength(),
|
||||
stream.getMaxReadLength(),
|
||||
stream.getNumUnmappedReads()));
|
||||
}
|
||||
|
||||
public void run() {
|
||||
streamAnalyzer = new PositionallyDownsampledArtificialSingleSampleReadStreamAnalyzer(stream, targetCoverage);
|
||||
downsamplingIter = new DownsamplingReadsIterator(stream.getStingSAMIterator(), new SimplePositionalDownsampler<SAMRecord>(targetCoverage));
|
||||
|
||||
streamAnalyzer.analyze(downsamplingIter);
|
||||
|
||||
// Check whether the observed properties of the downsampled stream are what they should be
|
||||
streamAnalyzer.validate();
|
||||
|
||||
// Allow memory used by this test to be reclaimed
|
||||
stream = null;
|
||||
streamAnalyzer = null;
|
||||
downsamplingIter = null;
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "DownsamplingReadsIteratorTestDataProvider")
|
||||
public Object[][] createDownsamplingReadsIteratorTests() {
|
||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(5, 1, 10000);
|
||||
String readGroupID = "testReadGroup";
|
||||
SAMReadGroupRecord readGroup = new SAMReadGroupRecord(readGroupID);
|
||||
readGroup.setSample("testSample");
|
||||
header.addReadGroup(readGroup);
|
||||
|
||||
// Values that don't vary across tests
|
||||
int targetCoverage = 10;
|
||||
int minReadLength = 50;
|
||||
int maxReadLength = 100;
|
||||
int minDistanceBetweenStacks = 1;
|
||||
int maxDistanceBetweenStacks = maxReadLength + 1;
|
||||
|
||||
GenomeAnalysisEngine.resetRandomGenerator();
|
||||
|
||||
// brute force testing!
|
||||
for ( int numContigs : Arrays.asList(1, 2, 5) ) {
|
||||
for ( int stacksPerContig : Arrays.asList(1, 2, 10) ) {
|
||||
for ( int minReadsPerStack : Arrays.asList(1, targetCoverage / 2, targetCoverage, targetCoverage - 1, targetCoverage + 1, targetCoverage * 2) ) {
|
||||
for ( int maxReadsPerStack : Arrays.asList(1, targetCoverage / 2, targetCoverage, targetCoverage - 1, targetCoverage + 1, targetCoverage * 2) ) {
|
||||
for ( int numUnmappedReads : Arrays.asList(0, 1, targetCoverage, targetCoverage * 2) ) {
|
||||
// Only interested in sane read stream configurations here
|
||||
if ( minReadsPerStack <= maxReadsPerStack ) {
|
||||
new DownsamplingReadsIteratorTest(new ArtificialSingleSampleReadStream(header,
|
||||
readGroupID,
|
||||
numContigs,
|
||||
stacksPerContig,
|
||||
minReadsPerStack,
|
||||
maxReadsPerStack,
|
||||
minDistanceBetweenStacks,
|
||||
maxDistanceBetweenStacks,
|
||||
minReadLength,
|
||||
maxReadLength,
|
||||
numUnmappedReads),
|
||||
targetCoverage);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return DownsamplingReadsIteratorTest.getTests(DownsamplingReadsIteratorTest.class);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "DownsamplingReadsIteratorTestDataProvider")
|
||||
public void runDownsamplingReadsIteratorTest( DownsamplingReadsIteratorTest test ) {
|
||||
logger.warn("Running test: " + test);
|
||||
|
||||
GenomeAnalysisEngine.resetRandomGenerator();
|
||||
test.run();
|
||||
}
|
||||
|
||||
*/
|
||||
}
|
||||
|
|
@ -52,7 +52,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.variant.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
import org.testng.Assert;
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
|||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.testng.Assert;
|
||||
|
|
|
|||
|
|
@ -68,12 +68,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSample() {
|
||||
HCTest(CEUTRIO_BAM, "", "11290b619bc79b629cf29b8f428254ce");
|
||||
HCTest(CEUTRIO_BAM, "", "664a14590d0966e63d3aabff2d7bab0a");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSample() {
|
||||
HCTest(NA12878_BAM, "", "897abb2b4f98e9e460f373f9e0db5033");
|
||||
HCTest(NA12878_BAM, "", "111f3dc086a3cea1be9bd1ad6e1d18ed");
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
|
|
@ -84,7 +84,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGA() {
|
||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
|
||||
"efc2cae94069a1d6ee5fdcc7afeaa0ed");
|
||||
"c70f753f7918a1c670ce4ed5c66de09e");
|
||||
}
|
||||
|
||||
private void HCTestComplexGGA(String bam, String args, String md5) {
|
||||
|
|
@ -96,13 +96,13 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGAComplex() {
|
||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538",
|
||||
"01f42c311fc3ce4f07ef86f8c01facfb");
|
||||
"b1d3070f0c49becf34101e480ab6c589");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
|
||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
|
||||
"4c117c84d1abeade1dee3f7b52a4a585");
|
||||
"20eba2e54266f6aebf35b7b7b7e754e3");
|
||||
}
|
||||
|
||||
private void HCTestComplexVariants(String bam, String args, String md5) {
|
||||
|
|
@ -113,7 +113,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleComplex() {
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "939847eb7bbafc798916acffdb1b5697");
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "f9805488d85e59e1ae002d0d32d7011d");
|
||||
}
|
||||
|
||||
private void HCTestSymbolicVariants(String bam, String args, String md5) {
|
||||
|
|
@ -124,7 +124,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSampleSymbolic() {
|
||||
HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "25806874242973f00fb6f2a320ed4d9c");
|
||||
HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "4544a2916f46f58b32db8008776b91a3");
|
||||
}
|
||||
|
||||
private void HCTestIndelQualityScores(String bam, String args, String md5) {
|
||||
|
|
@ -135,7 +135,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
|
||||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "c50b06d56cf3d0ef53e73a4973207949");
|
||||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "f3984a91e7562494c2a7e41fd05a6734");
|
||||
}
|
||||
|
||||
// That problem bam came from a user on the forum and it spotted a problem where the ReadClipper
|
||||
|
|
@ -146,14 +146,14 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void HCTestProblematicReadsModifiedInActiveRegions() {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("ae2470e294d99ff2b825281b84730c72"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("3e9e025c539be6c5e0d0f2e5d8e86a62"));
|
||||
executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void HCTestStructuralIndels() {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("6f18ae64bf466476d780a083dcb5fc43"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("34129e6c6310ef4eeeeb59b0e7ac0464"));
|
||||
executeTest("HCTestStructuralIndels: ", spec);
|
||||
}
|
||||
|
||||
|
|
@ -175,7 +175,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestReducedBam() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
|
||||
Arrays.asList("ecdb8e30ec5dd91efc179ab6732499f9"));
|
||||
Arrays.asList("5f4c07aaf1d2d34cccce43196a5fbd71"));
|
||||
executeTest("HC calling on a ReducedRead BAM", spec);
|
||||
}
|
||||
|
||||
|
|
@ -183,7 +183,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void testReducedBamWithReadsNotFullySpanningDeletion() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1,
|
||||
Arrays.asList("36a90309dde1a325c274388e302ffaa5"));
|
||||
Arrays.asList("6ead001b1f8e7cb433fd335f78fde5f0"));
|
||||
executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.manage
|
|||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
|
|||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.variant.utils.BaseUtils;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
|
|
|
|||
|
|
@ -1,66 +1,63 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
*
|
||||
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
|
||||
*
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
|
||||
*
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
*
|
||||
* 4. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
*
|
||||
* 5. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
*
|
||||
* 6. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
*
|
||||
* 7. MISCELLANEOUS
|
||||
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.*;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.utils.BaseUtils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Random;
|
||||
|
|
@ -89,38 +86,38 @@ public class RepeatCovariatesUnitTest {
|
|||
@Test(enabled = true)
|
||||
public void testFindNumberOfRepetitions() {
|
||||
// First, test logic to compute number of repetitions of a substring on a given string.
|
||||
int result = VariantContextUtils.findNumberofRepetitions("AC".getBytes(), "ACAC".getBytes(), true);
|
||||
int result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "ACAC".getBytes(), true);
|
||||
Assert.assertEquals(2,result);
|
||||
result = VariantContextUtils.findNumberofRepetitions("AC".getBytes(),"ACACACAC".getBytes(), true);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "ACACACAC".getBytes(), true);
|
||||
Assert.assertEquals(4,result);
|
||||
result = VariantContextUtils.findNumberofRepetitions("AC".getBytes(),"ACACACACGT".getBytes(), true);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "ACACACACGT".getBytes(), true);
|
||||
Assert.assertEquals(4,result);
|
||||
result = VariantContextUtils.findNumberofRepetitions("AC".getBytes(),"GTACACACAC".getBytes(), true);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "GTACACACAC".getBytes(), true);
|
||||
Assert.assertEquals(0,result);
|
||||
result = VariantContextUtils.findNumberofRepetitions("GCA".getBytes(),"GTAGGGT".getBytes(), true);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("GCA".getBytes(), "GTAGGGT".getBytes(), true);
|
||||
Assert.assertEquals(0,result);
|
||||
result = VariantContextUtils.findNumberofRepetitions("GCAGCA".getBytes(),"GCAGCAGTAGGGTGTACACACAC".getBytes(), true);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("GCAGCA".getBytes(), "GCAGCAGTAGGGTGTACACACAC".getBytes(), true);
|
||||
Assert.assertEquals(1,result);
|
||||
result = VariantContextUtils.findNumberofRepetitions("GCAGCA".getBytes(),"GTAGGGTGTACACACACGCAGCAT".getBytes(), true);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("GCAGCA".getBytes(), "GTAGGGTGTACACACACGCAGCAT".getBytes(), true);
|
||||
Assert.assertEquals(0,result);
|
||||
result = VariantContextUtils.findNumberofRepetitions("GCA".getBytes(),"GTAGGGTGTACACACACGCAGCAGCA".getBytes(), true);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("GCA".getBytes(), "GTAGGGTGTACACACACGCAGCAGCA".getBytes(), true);
|
||||
Assert.assertEquals(0,result);
|
||||
// Same tests but looking backward on string
|
||||
result = VariantContextUtils.findNumberofRepetitions("AC".getBytes(),"ACAC".getBytes(), false);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "ACAC".getBytes(), false);
|
||||
Assert.assertEquals(2,result);
|
||||
result = VariantContextUtils.findNumberofRepetitions("AC".getBytes(),"ACACACAC".getBytes(), false);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "ACACACAC".getBytes(), false);
|
||||
Assert.assertEquals(4,result);
|
||||
result = VariantContextUtils.findNumberofRepetitions("AC".getBytes(),"ACACACACGT".getBytes(), false);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "ACACACACGT".getBytes(), false);
|
||||
Assert.assertEquals(0,result);
|
||||
result = VariantContextUtils.findNumberofRepetitions("AC".getBytes(),"GTACACACAC".getBytes(), false);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("AC".getBytes(), "GTACACACAC".getBytes(), false);
|
||||
Assert.assertEquals(4,result);
|
||||
result = VariantContextUtils.findNumberofRepetitions("GCA".getBytes(),"GTAGGGT".getBytes(), false);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("GCA".getBytes(), "GTAGGGT".getBytes(), false);
|
||||
Assert.assertEquals(0,result);
|
||||
result = VariantContextUtils.findNumberofRepetitions("GCAGCA".getBytes(),"GCAGCAGTAGGGTGTACACACAC".getBytes(), false);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("GCAGCA".getBytes(), "GCAGCAGTAGGGTGTACACACAC".getBytes(), false);
|
||||
Assert.assertEquals(0,result);
|
||||
result = VariantContextUtils.findNumberofRepetitions("GCAGCA".getBytes(),"GTAGGGTGTACACACACGCAGCAT".getBytes(), false);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("GCAGCA".getBytes(), "GTAGGGTGTACACACACGCAGCAT".getBytes(), false);
|
||||
Assert.assertEquals(0,result);
|
||||
result = VariantContextUtils.findNumberofRepetitions("GCA".getBytes(),"GTAGGGTGTACACACACGCAGCAGCA".getBytes(), false);
|
||||
result = GATKVariantContextUtils.findNumberofRepetitions("GCA".getBytes(), "GTAGGGTGTACACACACGCAGCAGCA".getBytes(), false);
|
||||
Assert.assertEquals(3,result);
|
||||
|
||||
// test logic to get repeat unit and number of repeats from covariate value
|
||||
|
|
@ -208,8 +205,8 @@ public class RepeatCovariatesUnitTest {
|
|||
Assert.assertEquals(rurlValM,rurlValI);
|
||||
|
||||
|
||||
int fw = VariantContextUtils.findNumberofRepetitions(ruValM.getBytes(), readBases.substring(offset+1,readLength).getBytes(),true);
|
||||
int bw = VariantContextUtils.findNumberofRepetitions(ruValM.getBytes(), readBases.substring(0,offset+1).getBytes(),false);
|
||||
int fw = GATKVariantContextUtils.findNumberofRepetitions(ruValM.getBytes(), readBases.substring(offset+1,readLength).getBytes(),true);
|
||||
int bw = GATKVariantContextUtils.findNumberofRepetitions(ruValM.getBytes(), readBases.substring(0,offset+1).getBytes(),false);
|
||||
Assert.assertEquals(Math.min(fw+bw,RepeatCovariate.MAX_REPEAT_LENGTH),(int)Integer.valueOf(rlValM));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ import org.apache.log4j.Logger;
|
|||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.classloader.JVMUtils;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.ApplicationDetails;
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
|
|||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.codecs.hapmap.RawHapMapFeature;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -200,7 +201,7 @@ public class VariantContextAdaptors {
|
|||
if ( isSNP(dbsnp) || isMNP(dbsnp) )
|
||||
addPaddingBase = false;
|
||||
else if ( isIndel(dbsnp) || dbsnp.getVariantType().contains("mixed") )
|
||||
addPaddingBase = refBaseIsDash || VariantContextUtils.requiresPaddingBase(stripNullDashes(getAlleleList(dbsnp)));
|
||||
addPaddingBase = refBaseIsDash || GATKVariantContextUtils.requiresPaddingBase(stripNullDashes(getAlleleList(dbsnp)));
|
||||
else
|
||||
return null; // can't handle anything else
|
||||
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
|||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.file.FSLockWithShared;
|
||||
|
|
|
|||
|
|
@ -460,7 +460,7 @@ public class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,ActiveRegio
|
|||
printIGVFormatRow(walker.activeRegionOutStream, region.getLocation().getStartLocation(),
|
||||
"end-marker", 0.0);
|
||||
printIGVFormatRow(walker.activeRegionOutStream, region.getLocation(),
|
||||
"size=" + region.getLocation().size(), region.isActive ? 1.0 : -1.0);
|
||||
"size=" + region.getLocation().size(), region.isActive() ? 1.0 : -1.0);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -541,12 +541,12 @@ public class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,ActiveRegio
|
|||
}
|
||||
|
||||
if ( logger.isDebugEnabled() ) {
|
||||
logger.debug(">> Map call with " + activeRegion.getReads().size() + " " + (activeRegion.isActive ? "active" : "inactive") + " reads @ " + activeRegion.getLocation() + " with full extent: " + activeRegion.getReferenceLoc());
|
||||
logger.debug(">> Map call with " + activeRegion.getReads().size() + " " + (activeRegion.isActive() ? "active" : "inactive") + " reads @ " + activeRegion.getLocation() + " with full extent: " + activeRegion.getReadSpanLoc());
|
||||
}
|
||||
|
||||
if ( LOG_READ_CARRYING )
|
||||
logger.info(String.format("Processing region %20s span=%3d active?=%5b with %4d reads. Overall max reads carried is %s",
|
||||
activeRegion.getLocation(), activeRegion.getLocation().size(), activeRegion.isActive, activeRegion.size(), maxReadsInMemory));
|
||||
activeRegion.getLocation(), activeRegion.getLocation().size(), activeRegion.isActive(), activeRegion.size(), maxReadsInMemory));
|
||||
|
||||
final M x = walker.map(activeRegion, null);
|
||||
return walker.reduce( x, sum );
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ import org.broadinstitute.sting.gatk.samples.Sample;
|
|||
import org.broadinstitute.sting.gatk.samples.SampleDB;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.recalibration.BQSRMode;
|
||||
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
|
|
@ -249,7 +250,7 @@ public class VariantAnnotatorEngine {
|
|||
private VariantContext annotateDBs(final RefMetaDataTracker tracker, final GenomeLoc loc, VariantContext vc, final Map<String, Object> infoAnnotations) {
|
||||
for ( Map.Entry<RodBinding<VariantContext>, String> dbSet : dbAnnotations.entrySet() ) {
|
||||
if ( dbSet.getValue().equals(VCFConstants.DBSNP_KEY) ) {
|
||||
final String rsID = VCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbSet.getKey(), loc), vc.getType());
|
||||
final String rsID = GATKVCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbSet.getKey(), loc), vc.getType());
|
||||
|
||||
// add the ID if appropriate
|
||||
if ( rsID != null ) {
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.refseq.RefSeqCodec;
|
||||
import org.broadinstitute.sting.utils.codecs.refseq.RefSeqFeature;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.variant.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RefWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RefWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RefWalker;
|
||||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
||||
import java.util.Collections;
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ import org.broadinstitute.sting.utils.Utils;
|
|||
import org.broadinstitute.sting.utils.clipping.ClippingOp;
|
||||
import org.broadinstitute.sting.utils.clipping.ClippingRepresentation;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
|
|
|
|||
|
|
@ -49,6 +49,7 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.VariantEvalUtils;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -197,7 +198,7 @@ public class VariantEval extends RodWalker<Integer, Integer> implements TreeRedu
|
|||
protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50;
|
||||
|
||||
@Argument(shortName="ploidy", fullName="samplePloidy", doc="Per-sample ploidy (number of chromosomes per sample)", required=false)
|
||||
protected int ploidy = VariantContextUtils.DEFAULT_PLOIDY;
|
||||
protected int ploidy = GATKVariantContextUtils.DEFAULT_PLOIDY;
|
||||
|
||||
@Argument(fullName="ancestralAlignments", shortName="aa", doc="Fasta file with ancestral alleles", required=false)
|
||||
private File ancestralAlignmentsFile = null;
|
||||
|
|
@ -285,7 +286,7 @@ public class VariantEval extends RodWalker<Integer, Integer> implements TreeRedu
|
|||
|
||||
// Now that we have all the rods categorized, determine the sample list from the eval rods.
|
||||
Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), evals);
|
||||
Set<String> vcfSamples = SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
|
||||
Set<String> vcfSamples = SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
|
||||
|
||||
// Load the sample list, using an intermediate tree set to sort the samples
|
||||
final Set<String> allSampleNames = SampleUtils.getSamplesFromCommandLineInput(vcfSamples);
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
|||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.AnalysisModuleScanner;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.EvaluationContext;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
|
||||
|
|
|
|||
|
|
@ -193,7 +193,7 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
|
|||
|
||||
private boolean overlapsKnownCNV(VariantContext cnv) {
|
||||
if ( knownCNVs != null ) {
|
||||
final GenomeLoc loc = getWalker().getToolkit().getGenomeLocParser().createGenomeLoc(cnv, true);
|
||||
final GenomeLoc loc = getWalker().getToolkit().getGenomeLocParser().createGenomeLoc(cnv);
|
||||
IntervalTree<GenomeLoc> intervalTree = knownCNVs.get(loc.getContig());
|
||||
|
||||
final Iterator<IntervalTree.Node<GenomeLoc>> nodeIt = intervalTree.overlappers(loc.getStart(), loc.getStop());
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ public class IntervalStratification extends VariantStratifier {
|
|||
|
||||
public List<Object> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
if (eval != null) {
|
||||
final GenomeLoc loc = getVariantEvalWalker().getToolkit().getGenomeLocParser().createGenomeLoc(eval, true);
|
||||
final GenomeLoc loc = getVariantEvalWalker().getToolkit().getGenomeLocParser().createGenomeLoc(eval);
|
||||
IntervalTree<GenomeLoc> intervalTree = intervalTreeByContig.get(loc.getContig());
|
||||
IntervalTree.Node<GenomeLoc> node = intervalTree.minOverlapper(loc.getStart(), loc.getStop());
|
||||
//logger.info(String.format("Overlap %s found %s", loc, node));
|
||||
|
|
|
|||
|
|
@ -27,8 +27,8 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
|
|||
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
|
@ -51,7 +51,7 @@ public class TandemRepeat extends VariantStratifier {
|
|||
public List<Object> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
if ( eval == null || ! eval.isIndel() )
|
||||
return ALL;
|
||||
else if ( VariantContextUtils.isTandemRepeat(eval, ref.getForwardBases()) ) {
|
||||
else if ( GATKVariantContextUtils.isTandemRepeat(eval, ref.getForwardBases()) ) {
|
||||
print("REPEAT", eval, ref);
|
||||
return REPEAT;
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.manage
|
|||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.*;
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ import org.broadinstitute.sting.gatk.walkers.Window;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCountConstants;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
|
@ -135,14 +136,14 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
|||
protected VariantContextWriter vcfWriter = null;
|
||||
|
||||
@Argument(shortName="genotypeMergeOptions", doc="Determines how we should merge genotype records for samples shared across the ROD files", required=false)
|
||||
public VariantContextUtils.GenotypeMergeType genotypeMergeOption = null;
|
||||
public GATKVariantContextUtils.GenotypeMergeType genotypeMergeOption = null;
|
||||
|
||||
@Argument(shortName="filteredRecordsMergeType", doc="Determines how we should handle records seen at the same site in the VCF, but with different FILTER fields", required=false)
|
||||
public VariantContextUtils.FilteredRecordMergeType filteredRecordsMergeType = VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED;
|
||||
public GATKVariantContextUtils.FilteredRecordMergeType filteredRecordsMergeType = GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED;
|
||||
|
||||
@Hidden
|
||||
@Argument(shortName="multipleAllelesMergeType", doc="Determines how we should handle records seen at the same site in the VCF, but with different allele types (for example, SNP vs. indel)", required=false)
|
||||
public VariantContextUtils.MultipleAllelesMergeType multipleAllelesMergeType = VariantContextUtils.MultipleAllelesMergeType.BY_TYPE;
|
||||
public GATKVariantContextUtils.MultipleAllelesMergeType multipleAllelesMergeType = GATKVariantContextUtils.MultipleAllelesMergeType.BY_TYPE;
|
||||
|
||||
/**
|
||||
* Used when taking the union of variants that contain genotypes. A complete priority list MUST be provided.
|
||||
|
|
@ -203,12 +204,12 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
|||
|
||||
validateAnnotateUnionArguments();
|
||||
if ( PRIORITY_STRING == null && genotypeMergeOption == null) {
|
||||
genotypeMergeOption = VariantContextUtils.GenotypeMergeType.UNSORTED;
|
||||
genotypeMergeOption = GATKVariantContextUtils.GenotypeMergeType.UNSORTED;
|
||||
//PRIORITY_STRING = Utils.join(",", vcfRods.keySet()); Deleted by Ami (7/10/12)
|
||||
logger.info("Priority string is not provided, using arbitrary genotyping order: "+priority);
|
||||
}
|
||||
|
||||
if (genotypeMergeOption == VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE &&
|
||||
if (genotypeMergeOption == GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE &&
|
||||
!SampleUtils.verifyUniqueSamplesNames(vcfRods))
|
||||
throw new IllegalStateException("REQUIRE_UNIQUE sample names is true but duplicate names were discovered.");
|
||||
|
||||
|
|
@ -232,7 +233,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
|||
private void validateAnnotateUnionArguments() {
|
||||
Set<String> rodNames = SampleUtils.getRodNamesWithVCFHeader(getToolkit(), null);
|
||||
|
||||
if ( genotypeMergeOption == VariantContextUtils.GenotypeMergeType.PRIORITIZE && PRIORITY_STRING == null )
|
||||
if ( genotypeMergeOption == GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE && PRIORITY_STRING == null )
|
||||
throw new UserException.MissingArgument("rod_priority_list", "Priority string must be provided if you want to prioritize genotypes");
|
||||
|
||||
if ( PRIORITY_STRING != null){
|
||||
|
|
@ -278,7 +279,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
|||
|
||||
List<VariantContext> mergedVCs = new ArrayList<VariantContext>();
|
||||
|
||||
if (multipleAllelesMergeType == VariantContextUtils.MultipleAllelesMergeType.BY_TYPE) {
|
||||
if (multipleAllelesMergeType == GATKVariantContextUtils.MultipleAllelesMergeType.BY_TYPE) {
|
||||
Map<VariantContext.Type, List<VariantContext>> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs);
|
||||
|
||||
// TODO -- clean this up in a refactoring
|
||||
|
|
@ -296,13 +297,13 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
|||
// iterate over the types so that it's deterministic
|
||||
for (VariantContext.Type type : VariantContext.Type.values()) {
|
||||
if (VCsByType.containsKey(type))
|
||||
mergedVCs.add(VariantContextUtils.simpleMerge(VCsByType.get(type),
|
||||
priority, rodNames.size() , filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
|
||||
mergedVCs.add(GATKVariantContextUtils.simpleMerge(VCsByType.get(type),
|
||||
priority, rodNames.size(), filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
|
||||
SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
|
||||
}
|
||||
}
|
||||
else if (multipleAllelesMergeType == VariantContextUtils.MultipleAllelesMergeType.MIX_TYPES) {
|
||||
mergedVCs.add(VariantContextUtils.simpleMerge(vcs,
|
||||
else if (multipleAllelesMergeType == GATKVariantContextUtils.MultipleAllelesMergeType.MIX_TYPES) {
|
||||
mergedVCs.add(GATKVariantContextUtils.simpleMerge(vcs,
|
||||
priority, rodNames.size(), filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
|
||||
SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
|
|
|
|||
|
|
@ -39,12 +39,12 @@ import org.broadinstitute.sting.utils.SampleUtils;
|
|||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.sting.utils.text.ListFileUtils;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
|
|
@ -204,7 +204,7 @@ public class SelectHeaders extends RodWalker<Integer, Integer> implements TreeRe
|
|||
}
|
||||
}
|
||||
|
||||
TreeSet<String> vcfSamples = new TreeSet<String>(SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
|
||||
TreeSet<String> vcfSamples = new TreeSet<String>(SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
|
||||
VCFHeader vcfHeader = new VCFHeader(headerLines, vcfSamples);
|
||||
vcfHeader.setWriteEngineHeaders(includeEngineHeaders);
|
||||
vcfWriter.writeHeader(vcfHeader);
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@ import org.broadinstitute.sting.utils.MendelianViolation;
|
|||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
|
@ -337,7 +338,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
List<String> rodNames = Arrays.asList(variantCollection.variants.getName());
|
||||
|
||||
vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), rodNames);
|
||||
TreeSet<String> vcfSamples = new TreeSet<String>(SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
|
||||
TreeSet<String> vcfSamples = new TreeSet<String>(SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
|
||||
|
||||
Collection<String> samplesFromFile = SampleUtils.getSamplesFromFiles(sampleFiles);
|
||||
Collection<String> samplesFromExpressions = SampleUtils.matchSamplesExpressions(vcfSamples, sampleExpressions);
|
||||
|
|
@ -661,7 +662,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
|
||||
// if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs and AD (because they are no longer accurate)
|
||||
if ( vc.getAlleles().size() != sub.getAlleles().size() )
|
||||
newGC = VariantContextUtils.stripPLsAndAD(sub.getGenotypes());
|
||||
newGC = GATKVariantContextUtils.stripPLsAndAD(sub.getGenotypes());
|
||||
|
||||
// if we have fewer samples in the selected VC than in the original VC, we need to strip out the MLE tags
|
||||
if ( vc.getNSamples() != sub.getNSamples() ) {
|
||||
|
|
|
|||
|
|
@ -35,13 +35,13 @@ import org.broadinstitute.sting.gatk.walkers.*;
|
|||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextBuilder;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -256,7 +256,7 @@ public class VariantValidationAssessor extends RodWalker<VariantContext,Integer>
|
|||
//if ( popFile != null ) {
|
||||
// throw new StingException("We still need to implement this!");
|
||||
//} else {
|
||||
return VariantContextUtils.computeHardyWeinbergPvalue(vc);
|
||||
return GATKVariantContextUtils.computeHardyWeinbergPvalue(vc);
|
||||
//}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ import org.broadinstitute.sting.commandline.*;
|
|||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.VCFConstants;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
|
@ -180,7 +181,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
|||
|
||||
if ( !genotypeFieldsToTake.isEmpty() ) {
|
||||
Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), variants);
|
||||
TreeSet<String> vcfSamples = new TreeSet<String>(SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
|
||||
TreeSet<String> vcfSamples = new TreeSet<String>(SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
|
||||
samples.addAll(vcfSamples);
|
||||
|
||||
// optimization: if there are no samples, we don't have to worry about any genotype fields
|
||||
|
|
|
|||
|
|
@ -119,7 +119,7 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
|||
if ( tracker == null || !BaseUtils.isRegularBase(ref.getBase()) )
|
||||
return 0;
|
||||
|
||||
String rsID = dbsnp == null ? null : VCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbsnp.dbsnp, context.getLocation()), VariantContext.Type.SNP);
|
||||
String rsID = dbsnp == null ? null : GATKVCFUtils.rsIDOfFirstRealVariant(tracker.getValues(dbsnp.dbsnp, context.getLocation()), VariantContext.Type.SNP);
|
||||
|
||||
Collection<VariantContext> contexts = getVariantContexts(tracker, ref);
|
||||
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ import org.broadinstitute.sting.commandline.Input;
|
|||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
||||
import org.broadinstitute.variant.bcf2.BCF2Codec;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.vcf.VCFCodec;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -51,7 +51,7 @@ import java.util.*;
|
|||
|
||||
/**
|
||||
*
|
||||
* Usage: java -cp dist/GenomeAnalysisTK.jar org.broadinstitute.sting.tools.AppendVariants <reference> <input VCF or BCF files> <outputFile> [sorted (optional)]");
|
||||
* Usage: java -cp dist/GenomeAnalysisTK.jar org.broadinstitute.sting.tools.CatVariants <reference> <input VCF or BCF files> <outputFile> [sorted (optional)]");
|
||||
* The input files can be of type: VCF (ends in .vcf or .VCF)");
|
||||
* BCF2 (ends in .bcf or .BCF)");
|
||||
* Output file must be vcf or bcf file (.vcf or .bcf)");
|
||||
|
|
|
|||
|
|
@ -530,4 +530,55 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Serializable, HasGenome
|
|||
final int cmp = this.compareTo(other);
|
||||
return cmp == -1 ? other : this;
|
||||
}
|
||||
|
||||
/**
|
||||
* create a new genome loc from an existing loc, with a new start position
|
||||
* Note that this function will NOT explicitly check the ending offset, in case someone wants to
|
||||
* set the start of a new GenomeLoc pertaining to a read that goes off the end of the contig.
|
||||
*
|
||||
* @param loc the old location
|
||||
* @param start a new start position
|
||||
*
|
||||
* @return a newly allocated GenomeLoc as loc but with start == start
|
||||
*/
|
||||
public GenomeLoc setStart(GenomeLoc loc, int start) {
|
||||
return new GenomeLoc(loc.getContig(), loc.getContigIndex(), start, loc.getStop());
|
||||
}
|
||||
|
||||
/**
|
||||
* create a new genome loc from an existing loc, with a new stop position
|
||||
* Note that this function will NOT explicitly check the ending offset, in case someone wants to
|
||||
* set the stop of a new GenomeLoc pertaining to a read that goes off the end of the contig.
|
||||
*
|
||||
* @param loc the old location
|
||||
* @param stop a new stop position
|
||||
*
|
||||
* @return a newly allocated GenomeLoc as loc but with stop == stop
|
||||
*/
|
||||
public GenomeLoc setStop(GenomeLoc loc, int stop) {
|
||||
return new GenomeLoc(loc.getContig(), loc.getContigIndex(), loc.start, stop);
|
||||
}
|
||||
|
||||
/**
|
||||
* return a new genome loc, with an incremented position
|
||||
*
|
||||
* @param loc the old location
|
||||
*
|
||||
* @return a newly allocated GenomeLoc as loc but with start == loc.getStart() + 1
|
||||
*/
|
||||
public GenomeLoc incPos(GenomeLoc loc) {
|
||||
return incPos(loc, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* return a new genome loc, with an incremented position
|
||||
*
|
||||
* @param loc the old location
|
||||
* @param by how much to move the start and stop by
|
||||
*
|
||||
* @return a newly allocated GenomeLoc as loc but with start == loc.getStart() + by
|
||||
*/
|
||||
public GenomeLoc incPos(GenomeLoc loc, int by) {
|
||||
return new GenomeLoc(loc.getContig(), loc.getContigIndex(), loc.start + by, loc.stop + by);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -34,10 +34,8 @@ import net.sf.samtools.SAMSequenceDictionary;
|
|||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.variant.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
|
||||
/**
|
||||
* Factory class for creating GenomeLocs
|
||||
|
|
@ -45,6 +43,16 @@ import org.broadinstitute.variant.variantcontext.VariantContext;
|
|||
public final class GenomeLocParser {
|
||||
private static Logger logger = Logger.getLogger(GenomeLocParser.class);
|
||||
|
||||
/**
|
||||
* How much validation should we do at runtime with this parser?
|
||||
*/
|
||||
public enum ValidationLevel {
|
||||
/** Do the standard amount of validation */
|
||||
STANDARD,
|
||||
/** Don't do any real checking at all */
|
||||
NONE
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Ugly global variable defining the optional ordering of contig elements
|
||||
|
|
@ -58,120 +66,28 @@ public final class GenomeLocParser {
|
|||
final private SAMSequenceDictionary SINGLE_MASTER_SEQUENCE_DICTIONARY;
|
||||
|
||||
/**
|
||||
* A thread-local caching contig info
|
||||
* A thread-local CachingSequenceDictionary
|
||||
*/
|
||||
private final ThreadLocal<CachingSequenceDictionary> contigInfoPerThread =
|
||||
new ThreadLocal<CachingSequenceDictionary>();
|
||||
private final ThreadLocal<MRUCachingSAMSequenceDictionary> contigInfoPerThread =
|
||||
new ThreadLocal<MRUCachingSAMSequenceDictionary>() {
|
||||
@Override
|
||||
protected MRUCachingSAMSequenceDictionary initialValue() {
|
||||
return new MRUCachingSAMSequenceDictionary(SINGLE_MASTER_SEQUENCE_DICTIONARY);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* How much validation are we doing at runtime with this GenomeLocParser?
|
||||
*/
|
||||
private final ValidationLevel validationLevel;
|
||||
|
||||
/**
|
||||
* @return a caching sequence dictionary appropriate for this thread
|
||||
*/
|
||||
private CachingSequenceDictionary getContigInfo() {
|
||||
if ( contigInfoPerThread.get() == null ) {
|
||||
// initialize for this thread
|
||||
contigInfoPerThread.set(new CachingSequenceDictionary(SINGLE_MASTER_SEQUENCE_DICTIONARY));
|
||||
}
|
||||
|
||||
assert contigInfoPerThread.get() != null;
|
||||
|
||||
private MRUCachingSAMSequenceDictionary getContigInfo() {
|
||||
return contigInfoPerThread.get();
|
||||
}
|
||||
|
||||
/**
|
||||
* A wrapper class that provides efficient last used caching for the global
|
||||
* SAMSequenceDictionary underlying all of the GATK engine capabilities.
|
||||
*/
|
||||
private final class CachingSequenceDictionary {
|
||||
final private SAMSequenceDictionary dict;
|
||||
|
||||
// cache
|
||||
SAMSequenceRecord lastSSR = null;
|
||||
String lastContig = "";
|
||||
int lastIndex = -1;
|
||||
|
||||
@Requires({"dict != null", "dict.size() > 0"})
|
||||
public CachingSequenceDictionary(SAMSequenceDictionary dict) {
|
||||
this.dict = dict;
|
||||
}
|
||||
|
||||
@Ensures("result > 0")
|
||||
public final int getNSequences() {
|
||||
return dict.size();
|
||||
}
|
||||
|
||||
@Requires("contig != null")
|
||||
public final synchronized boolean hasContig(final String contig) {
|
||||
return contig.equals(lastContig) || dict.getSequence(contig) != null;
|
||||
}
|
||||
|
||||
@Requires("index >= 0")
|
||||
public final synchronized boolean hasContig(final int index) {
|
||||
return lastIndex == index || dict.getSequence(index) != null;
|
||||
}
|
||||
|
||||
@Requires("contig != null")
|
||||
@Ensures("result != null")
|
||||
public synchronized final SAMSequenceRecord getSequence(final String contig) {
|
||||
if ( isCached(contig) )
|
||||
return lastSSR;
|
||||
else
|
||||
return updateCache(contig, -1);
|
||||
}
|
||||
|
||||
@Requires("index >= 0")
|
||||
@Ensures("result != null")
|
||||
public synchronized final SAMSequenceRecord getSequence(final int index) {
|
||||
if ( isCached(index) )
|
||||
return lastSSR;
|
||||
else
|
||||
return updateCache(null, index);
|
||||
}
|
||||
|
||||
@Requires("contig != null")
|
||||
@Ensures("result >= 0")
|
||||
public synchronized final int getSequenceIndex(final String contig) {
|
||||
if ( ! isCached(contig) ) {
|
||||
updateCache(contig, -1);
|
||||
}
|
||||
|
||||
return lastIndex;
|
||||
}
|
||||
|
||||
@Requires({"contig != null", "lastContig != null"})
|
||||
private synchronized boolean isCached(final String contig) {
|
||||
return lastContig.equals(contig);
|
||||
}
|
||||
|
||||
@Requires({"lastIndex != -1", "index >= 0"})
|
||||
private synchronized boolean isCached(final int index) {
|
||||
return lastIndex == index;
|
||||
}
|
||||
|
||||
/**
|
||||
* The key algorithm. Given a new record, update the last used record, contig
|
||||
* name, and index.
|
||||
*
|
||||
* @param contig
|
||||
* @param index
|
||||
* @return
|
||||
*/
|
||||
@Requires("contig != null || index >= 0")
|
||||
@Ensures("result != null")
|
||||
private synchronized SAMSequenceRecord updateCache(final String contig, int index ) {
|
||||
SAMSequenceRecord rec = contig == null ? dict.getSequence(index) : dict.getSequence(contig);
|
||||
if ( rec == null ) {
|
||||
throw new ReviewedStingException("BUG: requested unknown contig=" + contig + " index=" + index);
|
||||
} else {
|
||||
lastSSR = rec;
|
||||
lastContig = rec.getSequenceName();
|
||||
lastIndex = rec.getSequenceIndex();
|
||||
return rec;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* set our internal reference contig order
|
||||
* @param refFile the reference file
|
||||
|
|
@ -181,16 +97,34 @@ public final class GenomeLocParser {
|
|||
this(refFile.getSequenceDictionary());
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new GenomeLocParser based on seqDictionary with the standard validation level
|
||||
* @param seqDict a non-null sequence dictionary
|
||||
*/
|
||||
public GenomeLocParser(SAMSequenceDictionary seqDict) {
|
||||
this(seqDict, ValidationLevel.STANDARD);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a genome loc parser based on seqDict with the specified level of validation
|
||||
* @param seqDict the sequence dictionary to use when creating genome locs
|
||||
* @param validationLevel how much validation should we do of the genome locs at runtime? Purely for testing purposes
|
||||
*/
|
||||
protected GenomeLocParser(SAMSequenceDictionary seqDict, final ValidationLevel validationLevel) {
|
||||
if (validationLevel == null)
|
||||
throw new IllegalArgumentException("validation level cannot be null");
|
||||
if (seqDict == null) { // we couldn't load the reference dictionary
|
||||
//logger.info("Failed to load reference dictionary, falling back to lexicographic order for contigs");
|
||||
throw new UserException.CommandLineException("Failed to load reference dictionary");
|
||||
}
|
||||
|
||||
SINGLE_MASTER_SEQUENCE_DICTIONARY = seqDict;
|
||||
logger.debug(String.format("Prepared reference sequence contig dictionary"));
|
||||
for (SAMSequenceRecord contig : seqDict.getSequences()) {
|
||||
logger.debug(String.format(" %s (%d bp)", contig.getSequenceName(), contig.getSequenceLength()));
|
||||
this.validationLevel = validationLevel;
|
||||
this.SINGLE_MASTER_SEQUENCE_DICTIONARY = seqDict;
|
||||
if ( logger.isDebugEnabled() ) {
|
||||
logger.debug(String.format("Prepared reference sequence contig dictionary"));
|
||||
for (SAMSequenceRecord contig : seqDict.getSequences()) {
|
||||
logger.debug(String.format(" %s (%d bp)", contig.getSequenceName(), contig.getSequenceLength()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -198,17 +132,13 @@ public final class GenomeLocParser {
|
|||
* Determines whether the given contig is valid with respect to the sequence dictionary
|
||||
* already installed in the GenomeLoc.
|
||||
*
|
||||
* @param contig a potentially null string name for the contig
|
||||
* @return True if the contig is valid. False otherwise.
|
||||
*/
|
||||
public final boolean contigIsInDictionary(String contig) {
|
||||
public final boolean contigIsInDictionary(final String contig) {
|
||||
return contig != null && getContigInfo().hasContig(contig);
|
||||
}
|
||||
|
||||
public final boolean indexIsInDictionary(final int index) {
|
||||
return index >= 0 && getContigInfo().hasContig(index);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* get the contig's SAMSequenceRecord
|
||||
*
|
||||
|
|
@ -249,7 +179,7 @@ public final class GenomeLocParser {
|
|||
* @return
|
||||
*/
|
||||
public final SAMSequenceDictionary getContigs() {
|
||||
return getContigInfo().dict;
|
||||
return getContigInfo().getDictionary();
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
|
@ -257,14 +187,13 @@ public final class GenomeLocParser {
|
|||
// Low-level creation functions
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* create a genome loc, given the contig name, start, and stop
|
||||
* @see #createGenomeLoc(String, int, int, int, boolean) for exact details of the creation.
|
||||
*
|
||||
* @param contig the contig name
|
||||
* @param start the starting position
|
||||
* @param stop the stop position
|
||||
*
|
||||
* @return a new genome loc
|
||||
* Note that because this function doesn't take the contig index as an argument for contig, it
|
||||
* has a slight performance penalty over the version that does take the contig index. Does not
|
||||
* require the created genome loc on the reference genome
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
@ThrowEnsures({"UserException.MalformedGenomeLoc", "!isValidGenomeLoc(contig, start, stop)"})
|
||||
|
|
@ -272,24 +201,61 @@ public final class GenomeLocParser {
|
|||
return createGenomeLoc(contig, getContigIndex(contig), start, stop);
|
||||
}
|
||||
|
||||
public GenomeLoc createGenomeLoc(String contig, final int start, final int stop, boolean mustBeOnReference) {
|
||||
/**
|
||||
* @see #createGenomeLoc(String, int, int, int, boolean) for exact details of the creation.
|
||||
*
|
||||
* Note that because this function doesn't take the contig index as an argument for contig, it
|
||||
* has a slight performance penalty over the version that does take the contig index.
|
||||
*/
|
||||
public GenomeLoc createGenomeLoc(final String contig, final int start, final int stop, boolean mustBeOnReference) {
|
||||
return createGenomeLoc(contig, getContigIndex(contig), start, stop, mustBeOnReference);
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #createGenomeLoc(String, int, int, int, boolean) for exact details of the creation.
|
||||
*
|
||||
* Doesn't require the start and stop to be on the genome
|
||||
*/
|
||||
@ThrowEnsures({"UserException.MalformedGenomeLoc", "!isValidGenomeLoc(contig, start, stop, false)"})
|
||||
public GenomeLoc createGenomeLoc(String contig, int index, final int start, final int stop) {
|
||||
return createGenomeLoc(contig, index, start, stop, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a GenomeLoc on contig, starting at start and ending (inclusive) at stop.
|
||||
*
|
||||
* @param contig the contig name
|
||||
* @param index the index into the GATK's SAMSequencingDictionary of contig (passed for efficiency to avoid the lookup)
|
||||
* @param start the starting position
|
||||
* @param stop the stop position of this loc, inclusive
|
||||
* @param mustBeOnReference if true, this factory will throw a UserException.MalformedGenomeLoc if start or stop isn't on the contig
|
||||
*
|
||||
* @return a non-null GenomeLoc
|
||||
*/
|
||||
@ThrowEnsures({"UserException.MalformedGenomeLoc", "!isValidGenomeLoc(contig, start, stop,mustBeOnReference)"})
|
||||
public GenomeLoc createGenomeLoc(String contig, int index, final int start, final int stop, boolean mustBeOnReference) {
|
||||
validateGenomeLoc(contig, index, start, stop, mustBeOnReference, true);
|
||||
return new GenomeLoc(contig, index, start, stop);
|
||||
@Ensures("result != null")
|
||||
public GenomeLoc createGenomeLoc(final String contig, int index, final int start, final int stop, boolean mustBeOnReference) {
|
||||
// optimization: by interning the string we ensure that future comparisons use == not the full string comp
|
||||
final String interned = validateGenomeLoc(contig, index, start, stop, mustBeOnReference);
|
||||
return new GenomeLoc(interned, index, start, stop);
|
||||
}
|
||||
|
||||
public GenomeLoc createGenomeLocOnContig(final String contig, final int start, final int stop) {
|
||||
GenomeLoc contigLoc = createOverEntireContig(contig);
|
||||
return new GenomeLoc(contig, getContigIndex(contig), start, stop).intersect(contigLoc);
|
||||
/**
|
||||
* Create a new GenomeLoc, on contig, including the single position pos.
|
||||
*
|
||||
* Pos is not required to be on the reference
|
||||
*
|
||||
* @see #createGenomeLoc(String, int, int, int, boolean) for exact details of the creation.
|
||||
*
|
||||
* @param contig the contig name
|
||||
* @param pos the start and stop of the created genome loc
|
||||
*
|
||||
* @return a genome loc representing a single base at the specified postion on the contig
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
@ThrowEnsures({"UserException.MalformedGenomeLoc", "!isValidGenomeLoc(contig, pos, pos, true)"})
|
||||
public GenomeLoc createGenomeLoc(final String contig, final int pos) {
|
||||
return createGenomeLoc(contig, getContigIndex(contig), pos, pos);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -306,50 +272,62 @@ public final class GenomeLocParser {
|
|||
* @param start the start position
|
||||
* @param stop the stop position
|
||||
*
|
||||
* @return true if it's valid, false otherwise. If exceptOnError, then throws a UserException if invalid
|
||||
* @return the interned contig name, an optimization that ensures that contig == the string in the sequence dictionary
|
||||
*/
|
||||
private boolean validateGenomeLoc(String contig, int contigIndex, int start, int stop, boolean mustBeOnReference, boolean exceptOnError) {
|
||||
if ( ! getContigInfo().hasContig(contig) )
|
||||
return vglHelper(exceptOnError, String.format("Unknown contig %s", contig));
|
||||
protected String validateGenomeLoc(final String contig, final int contigIndex, final int start, final int stop, final boolean mustBeOnReference) {
|
||||
if ( validationLevel == ValidationLevel.NONE )
|
||||
return contig;
|
||||
else {
|
||||
if (stop < start)
|
||||
vglHelper(String.format("The stop position %d is less than start %d in contig %s", stop, start, contig));
|
||||
|
||||
if (stop < start)
|
||||
return vglHelper(exceptOnError, String.format("The stop position %d is less than start %d in contig %s", stop, start, contig));
|
||||
final SAMSequenceRecord contigInfo = getContigInfo().getSequence(contig);
|
||||
if ( contigInfo.getSequenceIndex() != contigIndex )
|
||||
vglHelper(String.format("The contig index %d is bad, doesn't equal the contig index %d of the contig from a string %s",
|
||||
contigIndex, contigInfo.getSequenceIndex(), contig));
|
||||
|
||||
if (contigIndex < 0)
|
||||
return vglHelper(exceptOnError, String.format("The contig index %d is less than 0", contigIndex));
|
||||
if ( mustBeOnReference ) {
|
||||
if (start < 1)
|
||||
vglHelper(String.format("The start position %d is less than 1", start));
|
||||
|
||||
if (contigIndex >= getContigInfo().getNSequences())
|
||||
return vglHelper(exceptOnError, String.format("The contig index %d is greater than the stored sequence count (%d)", contigIndex, getContigInfo().getNSequences()));
|
||||
if (stop < 1)
|
||||
vglHelper(String.format("The stop position %d is less than 1", stop));
|
||||
|
||||
if ( mustBeOnReference ) {
|
||||
if (start < 1)
|
||||
return vglHelper(exceptOnError, String.format("The start position %d is less than 1", start));
|
||||
final int contigSize = contigInfo.getSequenceLength();
|
||||
if (start > contigSize || stop > contigSize)
|
||||
vglHelper(String.format("The genome loc coordinates %d-%d exceed the contig size (%d)", start, stop, contigSize));
|
||||
}
|
||||
|
||||
if (stop < 1)
|
||||
return vglHelper(exceptOnError, String.format("The stop position %d is less than 1", stop));
|
||||
|
||||
int contigSize = getContigInfo().getSequence(contigIndex).getSequenceLength();
|
||||
if (start > contigSize || stop > contigSize)
|
||||
return vglHelper(exceptOnError, String.format("The genome loc coordinates %d-%d exceed the contig size (%d)", start, stop, contigSize));
|
||||
return contigInfo.getSequenceName();
|
||||
}
|
||||
|
||||
// we passed
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Would a genome loc created with the given parameters be valid w.r.t. the master sequence dictionary?
|
||||
* @param contig the contig we'd use
|
||||
* @param start the start position
|
||||
* @param stop the stop
|
||||
* @param mustBeOnReference should we require the resulting genome loc to be completely on the reference genome?
|
||||
* @return true if this would produce a valid genome loc, false otherwise
|
||||
*/
|
||||
public boolean isValidGenomeLoc(String contig, int start, int stop, boolean mustBeOnReference ) {
|
||||
return validateGenomeLoc(contig, getContigIndexWithoutException(contig), start, stop, mustBeOnReference, false);
|
||||
}
|
||||
|
||||
public boolean isValidGenomeLoc(String contig, int start, int stop ) {
|
||||
return validateGenomeLoc(contig, getContigIndexWithoutException(contig), start, stop, true, false);
|
||||
}
|
||||
|
||||
private boolean vglHelper(boolean exceptOnError, String msg) {
|
||||
if ( exceptOnError )
|
||||
throw new UserException.MalformedGenomeLoc("Parameters to GenomeLocParser are incorrect:" + msg);
|
||||
else
|
||||
try {
|
||||
validateGenomeLoc(contig, getContigIndexWithoutException(contig), start, stop, mustBeOnReference);
|
||||
return true;
|
||||
} catch ( ReviewedStingException e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #isValidGenomeLoc(String, int, int) with mustBeOnReference == true
|
||||
*/
|
||||
public boolean isValidGenomeLoc(String contig, int start, int stop ) {
|
||||
return isValidGenomeLoc(contig, start, stop, true);
|
||||
}
|
||||
|
||||
private void vglHelper(final String msg) {
|
||||
throw new UserException.MalformedGenomeLoc("Parameters to GenomeLocParser are incorrect:" + msg);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
|
@ -421,7 +399,7 @@ public final class GenomeLocParser {
|
|||
*/
|
||||
@Requires("pos != null")
|
||||
@Ensures("result >= 0")
|
||||
private int parsePosition(final String pos) {
|
||||
protected int parsePosition(final String pos) {
|
||||
if(pos.indexOf('-') != -1) {
|
||||
throw new NumberFormatException("Position: '" + pos + "' can't contain '-'." );
|
||||
}
|
||||
|
|
@ -482,89 +460,34 @@ public final class GenomeLocParser {
|
|||
}
|
||||
|
||||
/**
|
||||
* Creates a GenomeLoc corresponding to the variant context vc. If includeSymbolicEndIfPossible
|
||||
* is true, and VC is a symbolic allele the end of the created genome loc will be the value
|
||||
* of the END info field key, if it exists, or vc.getEnd() if not.
|
||||
*
|
||||
* @param vc
|
||||
* @param includeSymbolicEndIfPossible
|
||||
* @return
|
||||
* @see GenomeLoc.setStart
|
||||
*/
|
||||
public GenomeLoc createGenomeLoc(final VariantContext vc, boolean includeSymbolicEndIfPossible) {
|
||||
if ( includeSymbolicEndIfPossible && vc.isSymbolic() ) {
|
||||
int end = vc.getAttributeAsInt(VCFConstants.END_KEY, vc.getEnd());
|
||||
return createGenomeLoc(vc.getChr(), vc.getStart(), end);
|
||||
}
|
||||
else
|
||||
return createGenomeLoc(vc.getChr(), vc.getStart(), vc.getEnd());
|
||||
}
|
||||
|
||||
public GenomeLoc createGenomeLoc(final VariantContext vc) {
|
||||
return createGenomeLoc(vc, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* create a new genome loc, given the contig name, and a single position. Must be on the reference
|
||||
*
|
||||
* @param contig the contig name
|
||||
* @param pos the postion
|
||||
*
|
||||
* @return a genome loc representing a single base at the specified postion on the contig
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
@ThrowEnsures({"UserException.MalformedGenomeLoc", "!isValidGenomeLoc(contig, pos, pos, true)"})
|
||||
public GenomeLoc createGenomeLoc(final String contig, final int pos) {
|
||||
return createGenomeLoc(contig, getContigIndex(contig), pos, pos);
|
||||
}
|
||||
|
||||
/**
|
||||
* create a new genome loc from an existing loc, with a new start position
|
||||
* Note that this function will NOT explicitly check the ending offset, in case someone wants to
|
||||
* set the start of a new GenomeLoc pertaining to a read that goes off the end of the contig.
|
||||
*
|
||||
* @param loc the old location
|
||||
* @param start a new start position
|
||||
*
|
||||
* @return the newly created genome loc
|
||||
*/
|
||||
public GenomeLoc setStart(GenomeLoc loc, int start) {
|
||||
@Deprecated
|
||||
public GenomeLoc setStart(final GenomeLoc loc, final int start) {
|
||||
return createGenomeLoc(loc.getContig(), loc.getContigIndex(), start, loc.getStop());
|
||||
}
|
||||
|
||||
/**
|
||||
* create a new genome loc from an existing loc, with a new stop position
|
||||
* Note that this function will NOT explicitly check the ending offset, in case someone wants to
|
||||
* set the stop of a new GenomeLoc pertaining to a read that goes off the end of the contig.
|
||||
*
|
||||
* @param loc the old location
|
||||
* @param stop a new stop position
|
||||
*
|
||||
* @return
|
||||
* @see GenomeLoc.setStop
|
||||
*/
|
||||
public GenomeLoc setStop(GenomeLoc loc, int stop) {
|
||||
@Deprecated
|
||||
public GenomeLoc setStop(final GenomeLoc loc, final int stop) {
|
||||
return createGenomeLoc(loc.getContig(), loc.getContigIndex(), loc.start, stop);
|
||||
}
|
||||
|
||||
/**
|
||||
* return a new genome loc, with an incremented position
|
||||
*
|
||||
* @param loc the old location
|
||||
*
|
||||
* @return a new genome loc
|
||||
* @see GenomeLoc.incPos
|
||||
*/
|
||||
public GenomeLoc incPos(GenomeLoc loc) {
|
||||
@Deprecated
|
||||
public GenomeLoc incPos(final GenomeLoc loc) {
|
||||
return incPos(loc, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* return a new genome loc, with an incremented position
|
||||
*
|
||||
* @param loc the old location
|
||||
* @param by how much to move the start and stop by
|
||||
*
|
||||
* @return a new genome loc
|
||||
* @see GenomeLoc.incPos
|
||||
*/
|
||||
public GenomeLoc incPos(GenomeLoc loc, int by) {
|
||||
@Deprecated
|
||||
public GenomeLoc incPos(final GenomeLoc loc, final int by) {
|
||||
return createGenomeLoc(loc.getContig(), loc.getContigIndex(), loc.start + by, loc.stop + by);
|
||||
}
|
||||
|
||||
|
|
@ -575,7 +498,7 @@ public final class GenomeLocParser {
|
|||
*/
|
||||
@Requires("contigName != null")
|
||||
@Ensures("result != null")
|
||||
public GenomeLoc createOverEntireContig(String contigName) {
|
||||
public GenomeLoc createOverEntireContig(final String contigName) {
|
||||
SAMSequenceRecord contig = getContigInfo().getSequence(contigName);
|
||||
return createGenomeLoc(contigName,contig.getSequenceIndex(),1,contig.getSequenceLength(), true);
|
||||
}
|
||||
|
|
@ -587,12 +510,12 @@ public final class GenomeLocParser {
|
|||
* @return The contiguous loc of up to maxBasePairs length or null if the loc is already at the start of the contig.
|
||||
*/
|
||||
@Requires({"loc != null", "maxBasePairs > 0"})
|
||||
public GenomeLoc createGenomeLocAtStart(GenomeLoc loc, int maxBasePairs) {
|
||||
public GenomeLoc createGenomeLocAtStart(final GenomeLoc loc, final int maxBasePairs) {
|
||||
if (GenomeLoc.isUnmapped(loc))
|
||||
return null;
|
||||
String contigName = loc.getContig();
|
||||
SAMSequenceRecord contig = getContigInfo().getSequence(contigName);
|
||||
int contigIndex = contig.getSequenceIndex();
|
||||
final String contigName = loc.getContig();
|
||||
final SAMSequenceRecord contig = getContigInfo().getSequence(contigName);
|
||||
final int contigIndex = contig.getSequenceIndex();
|
||||
|
||||
int start = loc.getStart() - maxBasePairs;
|
||||
int stop = loc.getStart() - 1;
|
||||
|
|
@ -611,19 +534,12 @@ public final class GenomeLocParser {
|
|||
* @param padding The number of base pairs to pad on either end
|
||||
* @return The contiguous loc of length up to the original length + 2*padding (depending on the start/end of the contig).
|
||||
*/
|
||||
@Requires({"loc != null", "padding > 0"})
|
||||
@Requires({"loc != null", "padding >= 0"})
|
||||
public GenomeLoc createPaddedGenomeLoc(final GenomeLoc loc, final int padding) {
|
||||
if (GenomeLoc.isUnmapped(loc))
|
||||
if (GenomeLoc.isUnmapped(loc) || padding == 0)
|
||||
return loc;
|
||||
final String contigName = loc.getContig();
|
||||
final SAMSequenceRecord contig = getContigInfo().getSequence(contigName);
|
||||
final int contigIndex = contig.getSequenceIndex();
|
||||
final int contigLength = contig.getSequenceLength();
|
||||
|
||||
final int start = Math.max(1, loc.getStart() - padding);
|
||||
final int stop = Math.min(contigLength, loc.getStop() + padding);
|
||||
|
||||
return createGenomeLoc(contigName, contigIndex, start, stop, true);
|
||||
else
|
||||
return createGenomeLocOnContig(loc.getContig(), loc.getContigIndex(), loc.getStart() - padding, loc.getStop() + padding);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -633,7 +549,7 @@ public final class GenomeLocParser {
|
|||
* @return The contiguous loc of up to maxBasePairs length or null if the loc is already at the end of the contig.
|
||||
*/
|
||||
@Requires({"loc != null", "maxBasePairs > 0"})
|
||||
public GenomeLoc createGenomeLocAtStop(GenomeLoc loc, int maxBasePairs) {
|
||||
public GenomeLoc createGenomeLocAtStop(final GenomeLoc loc, final int maxBasePairs) {
|
||||
if (GenomeLoc.isUnmapped(loc))
|
||||
return null;
|
||||
String contigName = loc.getContig();
|
||||
|
|
@ -651,4 +567,35 @@ public final class GenomeLocParser {
|
|||
|
||||
return createGenomeLoc(contigName, contigIndex, start, stop, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #createGenomeLocOnContig(String, int, int, int) with the contig index looked up from contig
|
||||
*/
|
||||
public GenomeLoc createGenomeLocOnContig(final String contig, final int start, final int stop) {
|
||||
return createGenomeLocOnContig(contig, getContigIndex(contig), start, stop);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new genome loc, bounding start and stop by the start and end of contig
|
||||
*
|
||||
* This function will return null if start and stop cannot be adjusted in any reasonable way
|
||||
* to be on the contig. For example, if start and stop are both past the end of the contig,
|
||||
* there's no way to fix this, and null will be returned.
|
||||
*
|
||||
* @param contig our contig
|
||||
* @param start our start as an arbitrary integer (may be negative, etc)
|
||||
* @param stop our stop as an arbitrary integer (may be negative, etc)
|
||||
* @return a valid genome loc over contig, or null if a meaningful genome loc cannot be created
|
||||
*/
|
||||
public GenomeLoc createGenomeLocOnContig(final String contig, final int contigIndex, final int start, final int stop) {
|
||||
final int contigLength = getContigInfo().getSequence(contigIndex).getSequenceLength();
|
||||
final int boundedStart = Math.max(1, start);
|
||||
final int boundedStop = Math.min(contigLength, stop);
|
||||
|
||||
if ( boundedStart > contigLength || boundedStop < 1 )
|
||||
// there's no meaningful way to create this genome loc, as the start and stop are off the contig
|
||||
return null;
|
||||
else
|
||||
return createGenomeLoc(contig, contigIndex, boundedStart, boundedStop);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,186 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
/**
|
||||
* A wrapper class that provides efficient most recently used caching for the global
|
||||
* SAMSequenceDictionary underlying all of the GATK engine capabilities. It is essential
|
||||
* that these class be as efficient as possible. It doesn't need to be thread-safe, as
|
||||
* GenomeLocParser uses a thread-local variable to ensure that each thread gets its own MRU
|
||||
* cache.
|
||||
*
|
||||
* The MRU elements are the SAMSequenceRecord, the lastContig, and the lastIndex. The
|
||||
* cached value is the actual SAMSequenceRecord of the most recently accessed value from
|
||||
* getSequence, along with local variables for the contig index and contig string.
|
||||
*/
|
||||
final class MRUCachingSAMSequenceDictionary {
|
||||
/**
|
||||
* Our sequence dictionary
|
||||
*/
|
||||
private final SAMSequenceDictionary dict;
|
||||
|
||||
SAMSequenceRecord lastSSR = null;
|
||||
String lastContig = "";
|
||||
int lastIndex = -1;
|
||||
|
||||
/**
|
||||
* Create a new MRUCachingSAMSequenceDictionary that provides information about sequences in dict
|
||||
* @param dict a non-null, non-empty sequencing dictionary
|
||||
*/
|
||||
@Ensures("lastSSR == null")
|
||||
public MRUCachingSAMSequenceDictionary(final SAMSequenceDictionary dict) {
|
||||
if ( dict == null ) throw new IllegalArgumentException("Dictionary cannot be null");
|
||||
if ( dict.size() == 0 ) throw new IllegalArgumentException("Dictionary cannot have size zero");
|
||||
|
||||
this.dict = dict;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get our sequence dictionary
|
||||
* @return a non-null SAMSequenceDictionary
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public SAMSequenceDictionary getDictionary() {
|
||||
return dict;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is contig present in the dictionary? Efficiently caching.
|
||||
* @param contig a non-null contig we want to test
|
||||
* @return true if contig is in dictionary, false otherwise
|
||||
*/
|
||||
@Requires("contig != null")
|
||||
public final boolean hasContig(final String contig) {
|
||||
return contig.equals(lastContig) || dict.getSequence(contig) != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is contig index present in the dictionary? Efficiently caching.
|
||||
* @param contigIndex an integer offset that might map to a contig in this dictionary
|
||||
* @return true if contigIndex is in dictionary, false otherwise
|
||||
*/
|
||||
@Requires("contigIndex >= 0")
|
||||
public final boolean hasContigIndex(final int contigIndex) {
|
||||
return lastIndex == contigIndex || dict.getSequence(contigIndex) != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Same as SAMSequenceDictionary.getSequence but uses a MRU cache for efficiency
|
||||
*
|
||||
* @param contig the contig name we want to get the sequence record of
|
||||
* @throws ReviewedStingException if contig isn't present in the dictionary
|
||||
* @return the sequence record for contig
|
||||
*/
|
||||
@Requires("contig != null")
|
||||
@Ensures("result != null")
|
||||
public final SAMSequenceRecord getSequence(final String contig) {
|
||||
if ( isCached(contig) )
|
||||
return lastSSR;
|
||||
else
|
||||
return updateCache(contig, -1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Same as SAMSequenceDictionary.getSequence but uses a MRU cache for efficiency
|
||||
*
|
||||
* @param index the contig index we want to get the sequence record of
|
||||
* @throws ReviewedStingException if contig isn't present in the dictionary
|
||||
* @return the sequence record for contig
|
||||
*/
|
||||
@Requires("index >= 0")
|
||||
@Ensures("result != null")
|
||||
public final SAMSequenceRecord getSequence(final int index) {
|
||||
if ( isCached(index) )
|
||||
return lastSSR;
|
||||
else
|
||||
return updateCache(null, index);
|
||||
}
|
||||
|
||||
/**
|
||||
* Same as SAMSequenceDictionary.getSequenceIndex but uses a MRU cache for efficiency
|
||||
*
|
||||
* @param contig the contig we want to get the sequence record of
|
||||
* @throws ReviewedStingException if index isn't present in the dictionary
|
||||
* @return the sequence record index for contig
|
||||
*/
|
||||
@Requires("contig != null")
|
||||
@Ensures("result >= 0")
|
||||
public final int getSequenceIndex(final String contig) {
|
||||
if ( ! isCached(contig) ) {
|
||||
updateCache(contig, -1);
|
||||
}
|
||||
|
||||
return lastIndex;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is contig the MRU cached contig?
|
||||
* @param contig the contig to test
|
||||
* @return true if contig is the currently cached contig, false otherwise
|
||||
*/
|
||||
@Requires({"contig != null"})
|
||||
protected boolean isCached(final String contig) {
|
||||
return contig.equals(lastContig);
|
||||
}
|
||||
|
||||
/**
|
||||
* Is the contig index index the MRU cached index?
|
||||
* @param index the contig index to test
|
||||
* @return true if contig index is the currently cached contig index, false otherwise
|
||||
*/
|
||||
protected boolean isCached(final int index) {
|
||||
return lastIndex == index;
|
||||
}
|
||||
|
||||
/**
|
||||
* The key algorithm. Given a new record, update the last used record, contig
|
||||
* name, and index.
|
||||
*
|
||||
* @param contig the contig we want to look up. If null, index is used instead
|
||||
* @param index the contig index we want to look up. Only used if contig is null
|
||||
* @throws ReviewedStingException if index isn't present in the dictionary
|
||||
* @return the SAMSequenceRecord for contig / index
|
||||
*/
|
||||
@Requires("contig != null || index >= 0")
|
||||
@Ensures("result != null")
|
||||
private SAMSequenceRecord updateCache(final String contig, int index ) {
|
||||
SAMSequenceRecord rec = contig == null ? dict.getSequence(index) : dict.getSequence(contig);
|
||||
if ( rec == null ) {
|
||||
throw new ReviewedStingException("BUG: requested unknown contig=" + contig + " index=" + index);
|
||||
} else {
|
||||
lastSSR = rec;
|
||||
lastContig = rec.getSequenceName();
|
||||
lastIndex = rec.getSequenceIndex();
|
||||
return rec;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -33,7 +33,7 @@ import org.apache.commons.math.MathException;
|
|||
import org.apache.commons.math.distribution.NormalDistribution;
|
||||
import org.apache.commons.math.distribution.NormalDistributionImpl;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
|
|
|||
|
|
@ -1,34 +1,34 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
|
||||
import java.util.*;
|
||||
|
|
|
|||
|
|
@ -29,11 +29,11 @@ import net.sf.samtools.SAMFileHeader;
|
|||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.text.ListFileUtils;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
|
|
@ -117,15 +117,15 @@ public class SampleUtils {
|
|||
}
|
||||
|
||||
public static Set<String> getSampleList(Map<String, VCFHeader> headers) {
|
||||
return getSampleList(headers, VariantContextUtils.GenotypeMergeType.PRIORITIZE);
|
||||
return getSampleList(headers, GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE);
|
||||
}
|
||||
|
||||
public static Set<String> getSampleList(Map<String, VCFHeader> headers, VariantContextUtils.GenotypeMergeType mergeOption) {
|
||||
public static Set<String> getSampleList(Map<String, VCFHeader> headers, GATKVariantContextUtils.GenotypeMergeType mergeOption) {
|
||||
Set<String> samples = new TreeSet<String>();
|
||||
for ( Map.Entry<String, VCFHeader> val : headers.entrySet() ) {
|
||||
VCFHeader header = val.getValue();
|
||||
for ( String sample : header.getGenotypeSamples() ) {
|
||||
samples.add(VariantContextUtils.mergedSampleName(val.getKey(), sample, mergeOption == VariantContextUtils.GenotypeMergeType.UNIQUIFY));
|
||||
samples.add(GATKVariantContextUtils.mergedSampleName(val.getKey(), sample, mergeOption == GATKVariantContextUtils.GenotypeMergeType.UNIQUIFY));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -25,37 +25,105 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.activeregion;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Invariant;
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.HasGenomeLocation;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* Represents a single active region created by the Active Region Traversal for processing
|
||||
*
|
||||
* An active region is a single contiguous span of bases on the genome that should be operated
|
||||
* on as a single unit for the active region traversal. The action may contains a list of
|
||||
* reads that overlap the region (may because there may be no reads in the region). The region
|
||||
* is tagged as being either active or inactive, depending on the probabilities provided by
|
||||
* the isActiveProb results from the ART walker. Each region carries with it the
|
||||
* exact span of the region (bases which are the core of the isActiveProbs from the walker) as
|
||||
* well as an extended size, that includes the ART walker's extension size. Reads in the region
|
||||
* provided by ART include all reads overlapping the extended span, not the raw span.
|
||||
*
|
||||
* User: rpoplin
|
||||
* Date: 1/4/12
|
||||
*/
|
||||
|
||||
@Invariant({
|
||||
"extension >= 0",
|
||||
"activeRegionLoc != null",
|
||||
"genomeLocParser != null",
|
||||
"spanIncludingReads != null",
|
||||
"extendedLoc != null"
|
||||
})
|
||||
public class ActiveRegion implements HasGenomeLocation {
|
||||
|
||||
/**
|
||||
* The reads included in this active region. May be empty upon creation, and expand / contract
|
||||
* as reads are added or removed from this region.
|
||||
*/
|
||||
private final ArrayList<GATKSAMRecord> reads = new ArrayList<GATKSAMRecord>();
|
||||
private final List<ActivityProfileState> supportingStates;
|
||||
private final GenomeLoc activeRegionLoc;
|
||||
private final GenomeLoc extendedLoc;
|
||||
private final int extension;
|
||||
private GenomeLoc fullExtentReferenceLoc = null;
|
||||
private final GenomeLocParser genomeLocParser;
|
||||
public final boolean isActive;
|
||||
|
||||
/**
|
||||
* An ordered list (by genomic coordinate) of the ActivityProfileStates that went
|
||||
* into this active region. May be empty, which says that no supporting states were
|
||||
* provided when this region was created.
|
||||
*/
|
||||
private final List<ActivityProfileState> supportingStates;
|
||||
|
||||
/**
|
||||
* The raw span of this active region, not including the active region extension
|
||||
*/
|
||||
private final GenomeLoc activeRegionLoc;
|
||||
|
||||
/**
|
||||
* The span of this active region on the genome, including the active region extension
|
||||
*/
|
||||
private final GenomeLoc extendedLoc;
|
||||
|
||||
/**
|
||||
* The extension, in bp, of this active region.
|
||||
*/
|
||||
private final int extension;
|
||||
|
||||
/**
|
||||
* A genomeLocParser so we can create genomeLocs
|
||||
*/
|
||||
private final GenomeLocParser genomeLocParser;
|
||||
|
||||
/**
|
||||
* Does this region represent an active region (all isActiveProbs above threshold) or
|
||||
* an inactive region (all isActiveProbs below threshold)?
|
||||
*/
|
||||
private final boolean isActive;
|
||||
|
||||
/**
|
||||
* The span of this active region, including the bp covered by all reads in this
|
||||
* region. This union of extensionLoc and the loc of all reads in this region.
|
||||
*
|
||||
* Must be at least as large as extendedLoc, but may be larger when reads
|
||||
* partially overlap this region.
|
||||
*/
|
||||
private GenomeLoc spanIncludingReads;
|
||||
|
||||
/**
|
||||
* Create a new ActiveRegion containing no reads
|
||||
*
|
||||
* @param activeRegionLoc the span of this active region
|
||||
* @param supportingStates the states that went into creating this region, or null / empty if none are available.
|
||||
* If not empty, must have exactly one state for each bp in activeRegionLoc
|
||||
* @param isActive indicates whether this is an active region, or an inactve one
|
||||
* @param genomeLocParser a non-null parser to let us create new genome locs
|
||||
* @param extension the active region extension to use for this active region
|
||||
*/
|
||||
public ActiveRegion( final GenomeLoc activeRegionLoc, final List<ActivityProfileState> supportingStates, final boolean isActive, final GenomeLocParser genomeLocParser, final int extension ) {
|
||||
if ( activeRegionLoc == null ) throw new IllegalArgumentException("activeRegionLoc cannot be null");
|
||||
if ( activeRegionLoc.size() == 0 ) throw new IllegalArgumentException("Active region cannot be of zero size, but got " + activeRegionLoc);
|
||||
if ( genomeLocParser == null ) throw new IllegalArgumentException("genomeLocParser cannot be null");
|
||||
if ( extension < 0 ) throw new IllegalArgumentException("extension cannot be < 0 but got " + extension);
|
||||
|
||||
|
|
@ -64,75 +132,254 @@ public class ActiveRegion implements HasGenomeLocation {
|
|||
this.isActive = isActive;
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
this.extension = extension;
|
||||
extendedLoc = genomeLocParser.createGenomeLocOnContig(activeRegionLoc.getContig(), activeRegionLoc.getStart() - extension, activeRegionLoc.getStop() + extension);
|
||||
fullExtentReferenceLoc = extendedLoc;
|
||||
this.extendedLoc = genomeLocParser.createGenomeLocOnContig(activeRegionLoc.getContig(), activeRegionLoc.getStart() - extension, activeRegionLoc.getStop() + extension);
|
||||
this.spanIncludingReads = extendedLoc;
|
||||
|
||||
if ( ! this.supportingStates.isEmpty() ) {
|
||||
if ( this.supportingStates.size() != activeRegionLoc.size() )
|
||||
throw new IllegalArgumentException("Supporting states wasn't empty but it doesn't have exactly one state per bp in the active region: states " + this.supportingStates.size() + " vs. bp in region = " + activeRegionLoc.size());
|
||||
GenomeLoc lastStateLoc = null;
|
||||
for ( final ActivityProfileState state : this.supportingStates ) {
|
||||
if ( lastStateLoc != null ) {
|
||||
if ( state.getLoc().getStart() != lastStateLoc.getStart() + 1 || state.getLoc().getContigIndex() != lastStateLoc.getContigIndex())
|
||||
throw new IllegalArgumentException("Supporting state has an invalid sequence: last state was " + lastStateLoc + " but next state was " + state);
|
||||
}
|
||||
lastStateLoc = state.getLoc();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "ActiveRegion " + activeRegionLoc.toString() + " active?=" + isActive + " nReads=" + reads.size() + " ";
|
||||
return "ActiveRegion " + activeRegionLoc.toString() + " active?=" + isActive() + " nReads=" + reads.size() + " ";
|
||||
}
|
||||
|
||||
// add each read to the bin and extend the reference genome activeRegionLoc if needed
|
||||
public void add( final GATKSAMRecord read ) {
|
||||
fullExtentReferenceLoc = fullExtentReferenceLoc.union( genomeLocParser.createGenomeLoc( read ) );
|
||||
reads.add( read );
|
||||
}
|
||||
|
||||
public void hardClipToActiveRegion() {
|
||||
final ArrayList<GATKSAMRecord> clippedReads = ReadClipper.hardClipToRegion( reads, extendedLoc.getStart(), extendedLoc.getStop() );
|
||||
reads.clear();
|
||||
reads.addAll(clippedReads);
|
||||
}
|
||||
|
||||
public ArrayList<GATKSAMRecord> getReads() { return reads; }
|
||||
|
||||
@Requires("referenceReader.isUppercasingBases()")
|
||||
public byte[] getActiveRegionReference( final CachingIndexedFastaSequenceFile referenceReader ) {
|
||||
/**
|
||||
* See #getActiveRegionReference but with padding == 0
|
||||
*/
|
||||
public byte[] getActiveRegionReference( final IndexedFastaSequenceFile referenceReader ) {
|
||||
return getActiveRegionReference(referenceReader, 0);
|
||||
}
|
||||
|
||||
@Requires("referenceReader.isUppercasingBases()")
|
||||
public byte[] getActiveRegionReference( final CachingIndexedFastaSequenceFile referenceReader, final int padding ) {
|
||||
return getReference( referenceReader, padding, extendedLoc );
|
||||
/**
|
||||
* Get the reference bases from referenceReader spanned by the extended location of this active region,
|
||||
* including additional padding bp on either side. If this expanded region would exceed the boundaries
|
||||
* of the active region's contig, the returned result will be truncated to only include on-genome reference
|
||||
* bases
|
||||
* @param referenceReader the source of the reference genome bases
|
||||
* @param padding the padding, in BP, we want to add to either side of this active region extended region
|
||||
* @return a non-null array of bytes holding the reference bases in referenceReader
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public byte[] getActiveRegionReference( final IndexedFastaSequenceFile referenceReader, final int padding ) {
|
||||
return getReference(referenceReader, padding, extendedLoc);
|
||||
}
|
||||
|
||||
@Requires("referenceReader.isUppercasingBases()")
|
||||
public byte[] getFullReference( final CachingIndexedFastaSequenceFile referenceReader ) {
|
||||
/**
|
||||
* See #getActiveRegionReference but using the span including regions not the extended span
|
||||
*/
|
||||
public byte[] getFullReference( final IndexedFastaSequenceFile referenceReader ) {
|
||||
return getFullReference(referenceReader, 0);
|
||||
}
|
||||
|
||||
@Requires("referenceReader.isUppercasingBases()")
|
||||
public byte[] getFullReference( final CachingIndexedFastaSequenceFile referenceReader, final int padding ) {
|
||||
return getReference( referenceReader, padding, fullExtentReferenceLoc );
|
||||
/**
|
||||
* See #getActiveRegionReference but using the span including regions not the extended span
|
||||
*/
|
||||
public byte[] getFullReference( final IndexedFastaSequenceFile referenceReader, final int padding ) {
|
||||
return getReference(referenceReader, padding, spanIncludingReads);
|
||||
}
|
||||
|
||||
@Requires("referenceReader.isUppercasingBases()")
|
||||
private byte[] getReference( final CachingIndexedFastaSequenceFile referenceReader, final int padding, final GenomeLoc genomeLoc ) {
|
||||
/**
|
||||
* Get the reference bases from referenceReader spanned by the extended location of this active region,
|
||||
* including additional padding bp on either side. If this expanded region would exceed the boundaries
|
||||
* of the active region's contig, the returned result will be truncated to only include on-genome reference
|
||||
* bases
|
||||
* @param referenceReader the source of the reference genome bases
|
||||
* @param padding the padding, in BP, we want to add to either side of this active region extended region
|
||||
* @param genomeLoc a non-null genome loc indicating the base span of the bp we'd like to get the reference for
|
||||
* @return a non-null array of bytes holding the reference bases in referenceReader
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
private byte[] getReference( final IndexedFastaSequenceFile referenceReader, final int padding, final GenomeLoc genomeLoc ) {
|
||||
if ( referenceReader == null ) throw new IllegalArgumentException("referenceReader cannot be null");
|
||||
if ( padding < 0 ) throw new IllegalArgumentException("padding must be a positive integer but got " + padding);
|
||||
if ( genomeLoc == null ) throw new IllegalArgumentException("genomeLoc cannot be null");
|
||||
if ( genomeLoc.size() == 0 ) throw new IllegalArgumentException("GenomeLoc must have size > 0 but got " + genomeLoc);
|
||||
|
||||
final byte[] reference = referenceReader.getSubsequenceAt( genomeLoc.getContig(),
|
||||
Math.max(1, genomeLoc.getStart() - padding),
|
||||
Math.min(referenceReader.getSequenceDictionary().getSequence(genomeLoc.getContig()).getSequenceLength(), genomeLoc.getStop() + padding) ).getBases();
|
||||
|
||||
return reference;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the raw span of this active region (excluding the extension)
|
||||
* @return a non-null genome loc
|
||||
*/
|
||||
@Override
|
||||
@Ensures("result != null")
|
||||
public GenomeLoc getLocation() { return activeRegionLoc; }
|
||||
|
||||
/**
|
||||
* Get the span of this active region including the extension value
|
||||
* @return a non-null GenomeLoc
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public GenomeLoc getExtendedLoc() { return extendedLoc; }
|
||||
public GenomeLoc getReferenceLoc() { return fullExtentReferenceLoc; }
|
||||
|
||||
public List<ActivityProfileState> getSupportingStates() { return supportingStates; }
|
||||
/**
|
||||
* Get the span of this active region including the extension and the projects on the
|
||||
* genome of all reads in this active region. That is, returns the bp covered by this
|
||||
* region and all reads in the region.
|
||||
* @return a non-null genome loc
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public GenomeLoc getReadSpanLoc() { return spanIncludingReads; }
|
||||
|
||||
/**
|
||||
* Get the active profile states that went into creating this region, if possible
|
||||
* @return an unmodifiable list of states that led to the creation of this region, or an empty
|
||||
* list if none were provided
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public List<ActivityProfileState> getSupportingStates() {
|
||||
return Collections.unmodifiableList(supportingStates);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the active region extension applied to this region
|
||||
*
|
||||
* The extension is >= 0 bp in size, and indicates how much padding this art walker wanted for its regions
|
||||
*
|
||||
* @return the size in bp of the region extension
|
||||
*/
|
||||
@Ensures("result >= 0")
|
||||
public int getExtension() { return extension; }
|
||||
public int size() { return reads.size(); }
|
||||
public void clearReads() { reads.clear(); }
|
||||
public void removeAll( final ArrayList<GATKSAMRecord> readsToRemove ) { reads.removeAll( readsToRemove ); }
|
||||
|
||||
public boolean equalExceptReads(final ActiveRegion other) {
|
||||
/**
|
||||
* Get an unmodifiable list of reads currently in this active region.
|
||||
*
|
||||
* The reads are sorted by their coordinate position
|
||||
*
|
||||
* @return an unmodifiable list of reads in this active region
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public List<GATKSAMRecord> getReads() {
|
||||
return Collections.unmodifiableList(reads);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of reads currently in this active region
|
||||
* @return an integer >= 0
|
||||
*/
|
||||
@Ensures("result >= 0")
|
||||
public int size() { return reads.size(); }
|
||||
|
||||
/**
|
||||
* Add read to this active region
|
||||
*
|
||||
* Read must have alignment start >= than the last read currently in this active region.
|
||||
*
|
||||
* @throws IllegalArgumentException if read doesn't overlap the extended region of this active region
|
||||
*
|
||||
* @param read a non-null GATKSAMRecord
|
||||
*/
|
||||
@Ensures("reads.size() == old(reads.size()) + 1")
|
||||
public void add( final GATKSAMRecord read ) {
|
||||
if ( read == null ) throw new IllegalArgumentException("Read cannot be null");
|
||||
|
||||
final GenomeLoc readLoc = genomeLocParser.createGenomeLoc( read );
|
||||
if ( ! readOverlapsRegion(read) )
|
||||
throw new IllegalArgumentException("Read location " + readLoc + " doesn't overlap with active region extended span " + extendedLoc);
|
||||
|
||||
spanIncludingReads = spanIncludingReads.union( readLoc );
|
||||
|
||||
if ( ! reads.isEmpty() ) {
|
||||
final GATKSAMRecord lastRead = reads.get(size() - 1);
|
||||
if ( ! lastRead.getReferenceIndex().equals(read.getReferenceIndex()) )
|
||||
throw new IllegalArgumentException("Attempting to add a read to ActiveRegion not on the same contig as other reads: lastRead " + lastRead + " attempting to add " + read);
|
||||
|
||||
if ( read.getAlignmentStart() < lastRead.getAlignmentStart() )
|
||||
throw new IllegalArgumentException("Attempting to add a read to ActiveRegion out of order w.r.t. other reads: lastRead " + lastRead + " at " + lastRead.getAlignmentStart() + " attempting to add " + read + " at " + read.getAlignmentStart());
|
||||
}
|
||||
|
||||
reads.add( read );
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if read would overlap the extended extent of this region
|
||||
* @param read the read we want to test
|
||||
* @return true if read can be added to this region, false otherwise
|
||||
*/
|
||||
public boolean readOverlapsRegion(final GATKSAMRecord read) {
|
||||
final GenomeLoc readLoc = genomeLocParser.createGenomeLoc( read );
|
||||
return readLoc.overlapsP(extendedLoc);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add all reads to this active region
|
||||
* @param reads a collection of reads to add to this active region
|
||||
*/
|
||||
public void addAll(final Collection<GATKSAMRecord> reads) {
|
||||
if ( reads == null ) throw new IllegalArgumentException("reads cannot be null");
|
||||
for ( final GATKSAMRecord read : reads )
|
||||
add(read);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear all of the reads currently in this active region
|
||||
*/
|
||||
@Ensures("size() == 0")
|
||||
public void clearReads() {
|
||||
spanIncludingReads = extendedLoc;
|
||||
reads.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove all of the reads in readsToRemove from this active region
|
||||
* @param readsToRemove the collection of reads we want to remove
|
||||
*/
|
||||
public void removeAll( final Collection<GATKSAMRecord> readsToRemove ) {
|
||||
reads.removeAll(readsToRemove);
|
||||
spanIncludingReads = extendedLoc;
|
||||
for ( final GATKSAMRecord read : reads ) {
|
||||
spanIncludingReads = spanIncludingReads.union( genomeLocParser.createGenomeLoc(read) );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Clips all of the reads in this active region so that none extend beyond the active region extended loc
|
||||
*
|
||||
* This function may change the getReadSpanLoc, as it updates the read span based on the new clipped
|
||||
* read coordinates.
|
||||
*/
|
||||
public void hardClipToActiveRegion() {
|
||||
final ArrayList<GATKSAMRecord> clippedReads = ReadClipper.hardClipToRegion( reads, extendedLoc.getStart(), extendedLoc.getStop() );
|
||||
ReadUtils.sortReadsByCoordinate(clippedReads);
|
||||
clearReads();
|
||||
addAll(clippedReads);
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this region equal to other, excluding any reads in either region in the comparison
|
||||
* @param other the other active region we want to test
|
||||
* @return true if this region is equal, excluding any reads and derived values, to other
|
||||
*/
|
||||
protected boolean equalExceptReads(final ActiveRegion other) {
|
||||
if ( activeRegionLoc.compareTo(other.activeRegionLoc) != 0 ) return false;
|
||||
if ( isActive != other.isActive ) return false;
|
||||
if ( isActive() != other.isActive()) return false;
|
||||
if ( genomeLocParser != other.genomeLocParser ) return false;
|
||||
if ( extension != other.extension ) return false;
|
||||
if ( extendedLoc.compareTo(other.extendedLoc) != 0 ) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does this region represent an active region (all isActiveProbs above threshold) or
|
||||
* an inactive region (all isActiveProbs below threshold)?
|
||||
*/
|
||||
public boolean isActive() {
|
||||
return isActive;
|
||||
}
|
||||
}
|
||||
|
|
@ -32,7 +32,7 @@ import net.sf.samtools.CigarOperator;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@
|
|||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.variant.utils;
|
||||
package org.broadinstitute.sting.utils.collections;
|
||||
|
||||
|
||||
public class Pair<X,Y> {
|
||||
|
|
@ -29,7 +29,7 @@ import org.broadinstitute.variant.utils.BaseUtils;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ import net.sf.samtools.CigarElement;
|
|||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ import org.broadinstitute.sting.gatk.CommandLineGATK;
|
|||
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.classloader.JVMUtils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
|
|
|
|||
|
|
@ -115,6 +115,19 @@ public class ArtificialSAMUtils {
|
|||
return header;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an artificial sam header based on the sequence dictionary dict
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public static SAMFileHeader createArtificialSamHeader(final SAMSequenceDictionary dict) {
|
||||
SAMFileHeader header = new SAMFileHeader();
|
||||
header.setSortOrder(net.sf.samtools.SAMFileHeader.SortOrder.coordinate);
|
||||
header.setSequenceDictionary(dict);
|
||||
return header;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* setup a default read group for a SAMFileHeader
|
||||
*
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.NGSPlatform;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.variant.utils.BaseUtils;
|
||||
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ import org.broad.tribble.readers.PositionalBufferedStream;
|
|||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
|
||||
|
|
@ -147,4 +147,49 @@ public class GATKVCFUtils {
|
|||
return VCFUtils.withUpdatedContigs(header, engine.getArguments().referenceFile, engine.getMasterSequenceDictionary());
|
||||
}
|
||||
|
||||
public static String rsIDOfFirstRealVariant(List<VariantContext> VCs, VariantContext.Type type) {
|
||||
if ( VCs == null )
|
||||
return null;
|
||||
|
||||
String rsID = null;
|
||||
for ( VariantContext vc : VCs ) {
|
||||
if ( vc.getType() == type ) {
|
||||
rsID = vc.getID();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return rsID;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read all of the VCF records from source into memory, returning the header and the VariantContexts
|
||||
*
|
||||
* SHOULD ONLY BE USED FOR UNIT/INTEGRATION TESTING PURPOSES!
|
||||
*
|
||||
* @param source the file to read, must be in VCF4 format
|
||||
* @return
|
||||
* @throws java.io.IOException
|
||||
*/
|
||||
public static Pair<VCFHeader, List<VariantContext>> readVCF(final File source) throws IOException {
|
||||
// read in the features
|
||||
final List<VariantContext> vcs = new ArrayList<VariantContext>();
|
||||
final VCFCodec codec = new VCFCodec();
|
||||
PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(source));
|
||||
FeatureCodecHeader header = codec.readHeader(pbs);
|
||||
pbs.close();
|
||||
|
||||
pbs = new PositionalBufferedStream(new FileInputStream(source));
|
||||
pbs.skip(header.getHeaderEnd());
|
||||
|
||||
final VCFHeader vcfHeader = (VCFHeader)header.getHeaderValue();
|
||||
|
||||
while ( ! pbs.isDone() ) {
|
||||
final VariantContext vc = codec.decode(pbs);
|
||||
if ( vc != null )
|
||||
vcs.add(vc);
|
||||
}
|
||||
|
||||
return new Pair<VCFHeader, List<VariantContext>>(vcfHeader, vcs);
|
||||
}
|
||||
}
|
||||
|
|
@ -25,12 +25,79 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.variant;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broad.tribble.util.popgen.HardyWeinbergCalculation;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
import org.broadinstitute.variant.vcf.VCFConstants;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.*;
|
||||
|
||||
public class GATKVariantContextUtils {
|
||||
|
||||
private static Logger logger = Logger.getLogger(GATKVariantContextUtils.class);
|
||||
|
||||
public static final int DEFAULT_PLOIDY = 2;
|
||||
public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
|
||||
private static final List<Allele> NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
||||
public final static String MERGE_FILTER_PREFIX = "filterIn";
|
||||
public final static String MERGE_REF_IN_ALL = "ReferenceInAll";
|
||||
public final static String MERGE_FILTER_IN_ALL = "FilteredInAll";
|
||||
public final static String MERGE_INTERSECTION = "Intersection";
|
||||
|
||||
public enum GenotypeMergeType {
|
||||
/**
|
||||
* Make all sample genotypes unique by file. Each sample shared across RODs gets named sample.ROD.
|
||||
*/
|
||||
UNIQUIFY,
|
||||
/**
|
||||
* Take genotypes in priority order (see the priority argument).
|
||||
*/
|
||||
PRIORITIZE,
|
||||
/**
|
||||
* Take the genotypes in any order.
|
||||
*/
|
||||
UNSORTED,
|
||||
/**
|
||||
* Require that all samples/genotypes be unique between all inputs.
|
||||
*/
|
||||
REQUIRE_UNIQUE
|
||||
}
|
||||
|
||||
public enum FilteredRecordMergeType {
|
||||
/**
|
||||
* Union - leaves the record if any record is unfiltered.
|
||||
*/
|
||||
KEEP_IF_ANY_UNFILTERED,
|
||||
/**
|
||||
* Requires all records present at site to be unfiltered. VCF files that don't contain the record don't influence this.
|
||||
*/
|
||||
KEEP_IF_ALL_UNFILTERED,
|
||||
/**
|
||||
* If any record is present at this site (regardless of possibly being filtered), then all such records are kept and the filters are reset.
|
||||
*/
|
||||
KEEP_UNCONDITIONAL
|
||||
}
|
||||
|
||||
public enum MultipleAllelesMergeType {
|
||||
/**
|
||||
* Combine only alleles of the same type (SNP, indel, etc.) into a single VCF record.
|
||||
*/
|
||||
BY_TYPE,
|
||||
/**
|
||||
* Merge all allele types at the same start position into the same VCF record.
|
||||
*/
|
||||
MIX_TYPES
|
||||
}
|
||||
|
||||
/**
|
||||
* create a genome location, given a variant context
|
||||
* @param genomeLocParser parser
|
||||
|
|
@ -41,4 +108,885 @@ public class GATKVariantContextUtils {
|
|||
return genomeLocParser.createGenomeLoc(vc.getChr(), vc.getStart(), vc.getEnd(), true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true iff VC is an non-complex indel where every allele represents an expansion or
|
||||
* contraction of a series of identical bases in the reference.
|
||||
*
|
||||
* For example, suppose the ref bases are CTCTCTGA, which includes a 3x repeat of CTCTCT
|
||||
*
|
||||
* If VC = -/CT, then this function returns true because the CT insertion matches exactly the
|
||||
* upcoming reference.
|
||||
* If VC = -/CTA then this function returns false because the CTA isn't a perfect match
|
||||
*
|
||||
* Now consider deletions:
|
||||
*
|
||||
* If VC = CT/- then again the same logic applies and this returns true
|
||||
* The case of CTA/- makes no sense because it doesn't actually match the reference bases.
|
||||
*
|
||||
* The logic of this function is pretty simple. Take all of the non-null alleles in VC. For
|
||||
* each insertion allele of n bases, check if that allele matches the next n reference bases.
|
||||
* For each deletion allele of n bases, check if this matches the reference bases at n - 2 n,
|
||||
* as it must necessarily match the first n bases. If this test returns true for all
|
||||
* alleles you are a tandem repeat, otherwise you are not.
|
||||
*
|
||||
* @param vc
|
||||
* @param refBasesStartingAtVCWithPad not this is assumed to include the PADDED reference
|
||||
* @return
|
||||
*/
|
||||
@Requires({"vc != null", "refBasesStartingAtVCWithPad != null && refBasesStartingAtVCWithPad.length > 0"})
|
||||
public static boolean isTandemRepeat(final VariantContext vc, final byte[] refBasesStartingAtVCWithPad) {
|
||||
final String refBasesStartingAtVCWithoutPad = new String(refBasesStartingAtVCWithPad).substring(1);
|
||||
if ( ! vc.isIndel() ) // only indels are tandem repeats
|
||||
return false;
|
||||
|
||||
final Allele ref = vc.getReference();
|
||||
|
||||
for ( final Allele allele : vc.getAlternateAlleles() ) {
|
||||
if ( ! isRepeatAllele(ref, allele, refBasesStartingAtVCWithoutPad) )
|
||||
return false;
|
||||
}
|
||||
|
||||
// we've passed all of the tests, so we are a repeat
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param vc
|
||||
* @param refBasesStartingAtVCWithPad
|
||||
* @return
|
||||
*/
|
||||
@Requires({"vc != null", "refBasesStartingAtVCWithPad != null && refBasesStartingAtVCWithPad.length > 0"})
|
||||
public static Pair<List<Integer>,byte[]> getNumTandemRepeatUnits(final VariantContext vc, final byte[] refBasesStartingAtVCWithPad) {
|
||||
final boolean VERBOSE = false;
|
||||
final String refBasesStartingAtVCWithoutPad = new String(refBasesStartingAtVCWithPad).substring(1);
|
||||
if ( ! vc.isIndel() ) // only indels are tandem repeats
|
||||
return null;
|
||||
|
||||
final Allele refAllele = vc.getReference();
|
||||
final byte[] refAlleleBases = Arrays.copyOfRange(refAllele.getBases(), 1, refAllele.length());
|
||||
|
||||
byte[] repeatUnit = null;
|
||||
final ArrayList<Integer> lengths = new ArrayList<Integer>();
|
||||
|
||||
for ( final Allele allele : vc.getAlternateAlleles() ) {
|
||||
Pair<int[],byte[]> result = getNumTandemRepeatUnits(refAlleleBases, Arrays.copyOfRange(allele.getBases(), 1, allele.length()), refBasesStartingAtVCWithoutPad.getBytes());
|
||||
|
||||
final int[] repetitionCount = result.first;
|
||||
// repetition count = 0 means allele is not a tandem expansion of context
|
||||
if (repetitionCount[0] == 0 || repetitionCount[1] == 0)
|
||||
return null;
|
||||
|
||||
if (lengths.size() == 0) {
|
||||
lengths.add(repetitionCount[0]); // add ref allele length only once
|
||||
}
|
||||
lengths.add(repetitionCount[1]); // add this alt allele's length
|
||||
|
||||
repeatUnit = result.second;
|
||||
if (VERBOSE) {
|
||||
System.out.println("RefContext:"+refBasesStartingAtVCWithoutPad);
|
||||
System.out.println("Ref:"+refAllele.toString()+" Count:" + String.valueOf(repetitionCount[0]));
|
||||
System.out.println("Allele:"+allele.toString()+" Count:" + String.valueOf(repetitionCount[1]));
|
||||
System.out.println("RU:"+new String(repeatUnit));
|
||||
}
|
||||
}
|
||||
|
||||
return new Pair<List<Integer>, byte[]>(lengths,repeatUnit);
|
||||
}
|
||||
|
||||
public static Pair<int[],byte[]> getNumTandemRepeatUnits(final byte[] refBases, final byte[] altBases, final byte[] remainingRefContext) {
|
||||
/* we can't exactly apply same logic as in basesAreRepeated() to compute tandem unit and number of repeated units.
|
||||
Consider case where ref =ATATAT and we have an insertion of ATAT. Natural description is (AT)3 -> (AT)2.
|
||||
*/
|
||||
|
||||
byte[] longB;
|
||||
// find first repeat unit based on either ref or alt, whichever is longer
|
||||
if (altBases.length > refBases.length)
|
||||
longB = altBases;
|
||||
else
|
||||
longB = refBases;
|
||||
|
||||
// see if non-null allele (either ref or alt, whichever is longer) can be decomposed into several identical tandem units
|
||||
// for example, -*,CACA needs to first be decomposed into (CA)2
|
||||
final int repeatUnitLength = findRepeatedSubstring(longB);
|
||||
final byte[] repeatUnit = Arrays.copyOf(longB, repeatUnitLength);
|
||||
|
||||
final int[] repetitionCount = new int[2];
|
||||
// look for repetitions forward on the ref bases (i.e. starting at beginning of ref bases)
|
||||
int repetitionsInRef = findNumberofRepetitions(repeatUnit,refBases, true);
|
||||
repetitionCount[0] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(refBases, remainingRefContext), true)-repetitionsInRef;
|
||||
repetitionCount[1] = findNumberofRepetitions(repeatUnit, ArrayUtils.addAll(altBases, remainingRefContext), true)-repetitionsInRef;
|
||||
|
||||
return new Pair<int[], byte[]>(repetitionCount, repeatUnit);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Find out if a string can be represented as a tandem number of substrings.
|
||||
* For example ACTACT is a 2-tandem of ACT,
|
||||
* but ACTACA is not.
|
||||
*
|
||||
* @param bases String to be tested
|
||||
* @return Length of repeat unit, if string can be represented as tandem of substring (if it can't
|
||||
* be represented as one, it will be just the length of the input string)
|
||||
*/
|
||||
public static int findRepeatedSubstring(byte[] bases) {
|
||||
|
||||
int repLength;
|
||||
for (repLength=1; repLength <=bases.length; repLength++) {
|
||||
final byte[] candidateRepeatUnit = Arrays.copyOf(bases,repLength);
|
||||
boolean allBasesMatch = true;
|
||||
for (int start = repLength; start < bases.length; start += repLength ) {
|
||||
// check that remaining of string is exactly equal to repeat unit
|
||||
final byte[] basePiece = Arrays.copyOfRange(bases,start,start+candidateRepeatUnit.length);
|
||||
if (!Arrays.equals(candidateRepeatUnit, basePiece)) {
|
||||
allBasesMatch = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (allBasesMatch)
|
||||
return repLength;
|
||||
}
|
||||
|
||||
return repLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper routine that finds number of repetitions a string consists of.
|
||||
* For example, for string ATAT and repeat unit AT, number of repetitions = 2
|
||||
* @param repeatUnit Substring
|
||||
* @param testString String to test
|
||||
* @oaram lookForward Look for repetitions forward (at beginning of string) or backward (at end of string)
|
||||
* @return Number of repetitions (0 if testString is not a concatenation of n repeatUnit's
|
||||
*/
|
||||
public static int findNumberofRepetitions(byte[] repeatUnit, byte[] testString, boolean lookForward) {
|
||||
int numRepeats = 0;
|
||||
if (lookForward) {
|
||||
// look forward on the test string
|
||||
for (int start = 0; start < testString.length; start += repeatUnit.length) {
|
||||
int end = start + repeatUnit.length;
|
||||
byte[] unit = Arrays.copyOfRange(testString,start, end);
|
||||
if(Arrays.equals(unit,repeatUnit))
|
||||
numRepeats++;
|
||||
else
|
||||
break;
|
||||
}
|
||||
return numRepeats;
|
||||
}
|
||||
|
||||
// look backward. For example, if repeatUnit = AT and testString = GATAT, number of repeat units is still 2
|
||||
// look forward on the test string
|
||||
for (int start = testString.length - repeatUnit.length; start >= 0; start -= repeatUnit.length) {
|
||||
int end = start + repeatUnit.length;
|
||||
byte[] unit = Arrays.copyOfRange(testString,start, end);
|
||||
if(Arrays.equals(unit,repeatUnit))
|
||||
numRepeats++;
|
||||
else
|
||||
break;
|
||||
}
|
||||
return numRepeats;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function for isTandemRepeat that checks that allele matches somewhere on the reference
|
||||
* @param ref
|
||||
* @param alt
|
||||
* @param refBasesStartingAtVCWithoutPad
|
||||
* @return
|
||||
*/
|
||||
protected static boolean isRepeatAllele(final Allele ref, final Allele alt, final String refBasesStartingAtVCWithoutPad) {
|
||||
if ( ! Allele.oneIsPrefixOfOther(ref, alt) )
|
||||
return false; // we require one allele be a prefix of another
|
||||
|
||||
if ( ref.length() > alt.length() ) { // we are a deletion
|
||||
return basesAreRepeated(ref.getBaseString(), alt.getBaseString(), refBasesStartingAtVCWithoutPad, 2);
|
||||
} else { // we are an insertion
|
||||
return basesAreRepeated(alt.getBaseString(), ref.getBaseString(), refBasesStartingAtVCWithoutPad, 1);
|
||||
}
|
||||
}
|
||||
|
||||
protected static boolean basesAreRepeated(final String l, final String s, final String ref, final int minNumberOfMatches) {
|
||||
final String potentialRepeat = l.substring(s.length()); // skip s bases
|
||||
|
||||
for ( int i = 0; i < minNumberOfMatches; i++) {
|
||||
final int start = i * potentialRepeat.length();
|
||||
final int end = (i+1) * potentialRepeat.length();
|
||||
if ( ref.length() < end )
|
||||
return false; // we ran out of bases to test
|
||||
final String refSub = ref.substring(start, end);
|
||||
if ( ! refSub.equals(potentialRepeat) )
|
||||
return false; // repeat didn't match, fail
|
||||
}
|
||||
|
||||
return true; // we passed all tests, we matched
|
||||
}
|
||||
|
||||
/**
|
||||
* subset the Variant Context to the specific set of alleles passed in (pruning the PLs appropriately)
|
||||
*
|
||||
* @param vc variant context with genotype likelihoods
|
||||
* @param allelesToUse which alleles from the vc are okay to use; *** must be in the same relative order as those in the original VC ***
|
||||
* @param assignGenotypes true if we should update the genotypes based on the (subsetted) PLs
|
||||
* @return genotypes
|
||||
*/
|
||||
public static GenotypesContext subsetDiploidAlleles(final VariantContext vc,
|
||||
final List<Allele> allelesToUse,
|
||||
final boolean assignGenotypes) {
|
||||
|
||||
// the genotypes with PLs
|
||||
final GenotypesContext oldGTs = vc.getGenotypes();
|
||||
|
||||
// samples
|
||||
final List<String> sampleIndices = oldGTs.getSampleNamesOrderedByName();
|
||||
|
||||
// the new genotypes to create
|
||||
final GenotypesContext newGTs = GenotypesContext.create();
|
||||
|
||||
// we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward
|
||||
final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
|
||||
final int numNewAltAlleles = allelesToUse.size() - 1;
|
||||
|
||||
// which PLs should be carried forward?
|
||||
ArrayList<Integer> likelihoodIndexesToUse = null;
|
||||
|
||||
// an optimization: if we are supposed to use all (or none in the case of a ref call) of the alleles,
|
||||
// then we can keep the PLs as is; otherwise, we determine which ones to keep
|
||||
if ( numNewAltAlleles != numOriginalAltAlleles && numNewAltAlleles > 0 ) {
|
||||
likelihoodIndexesToUse = new ArrayList<Integer>(30);
|
||||
|
||||
final boolean[] altAlleleIndexToUse = new boolean[numOriginalAltAlleles];
|
||||
for ( int i = 0; i < numOriginalAltAlleles; i++ ) {
|
||||
if ( allelesToUse.contains(vc.getAlternateAllele(i)) )
|
||||
altAlleleIndexToUse[i] = true;
|
||||
}
|
||||
|
||||
// numLikelihoods takes total # of alleles. Use default # of chromosomes (ploidy) = 2
|
||||
final int numLikelihoods = GenotypeLikelihoods.numLikelihoods(1 + numOriginalAltAlleles, DEFAULT_PLOIDY);
|
||||
for ( int PLindex = 0; PLindex < numLikelihoods; PLindex++ ) {
|
||||
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
|
||||
// consider this entry only if both of the alleles are good
|
||||
if ( (alleles.alleleIndex1 == 0 || altAlleleIndexToUse[alleles.alleleIndex1 - 1]) && (alleles.alleleIndex2 == 0 || altAlleleIndexToUse[alleles.alleleIndex2 - 1]) )
|
||||
likelihoodIndexesToUse.add(PLindex);
|
||||
}
|
||||
}
|
||||
|
||||
// create the new genotypes
|
||||
for ( int k = 0; k < oldGTs.size(); k++ ) {
|
||||
final Genotype g = oldGTs.get(sampleIndices.get(k));
|
||||
if ( !g.hasLikelihoods() ) {
|
||||
newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES));
|
||||
continue;
|
||||
}
|
||||
|
||||
// create the new likelihoods array from the alleles we are allowed to use
|
||||
final double[] originalLikelihoods = g.getLikelihoods().getAsVector();
|
||||
double[] newLikelihoods;
|
||||
if ( likelihoodIndexesToUse == null ) {
|
||||
newLikelihoods = originalLikelihoods;
|
||||
} else {
|
||||
newLikelihoods = new double[likelihoodIndexesToUse.size()];
|
||||
int newIndex = 0;
|
||||
for ( int oldIndex : likelihoodIndexesToUse )
|
||||
newLikelihoods[newIndex++] = originalLikelihoods[oldIndex];
|
||||
|
||||
// might need to re-normalize
|
||||
newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true);
|
||||
}
|
||||
|
||||
// if there is no mass on the (new) likelihoods, then just no-call the sample
|
||||
if ( MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) {
|
||||
newGTs.add(GenotypeBuilder.create(g.getSampleName(), NO_CALL_ALLELES));
|
||||
}
|
||||
else {
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(g);
|
||||
|
||||
if ( numNewAltAlleles == 0 )
|
||||
gb.noPL();
|
||||
else
|
||||
gb.PL(newLikelihoods);
|
||||
|
||||
// if we weren't asked to assign a genotype, then just no-call the sample
|
||||
if ( !assignGenotypes || MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) {
|
||||
gb.alleles(NO_CALL_ALLELES);
|
||||
}
|
||||
else {
|
||||
// find the genotype with maximum likelihoods
|
||||
int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods);
|
||||
GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
|
||||
|
||||
gb.alleles(Arrays.asList(allelesToUse.get(alleles.alleleIndex1), allelesToUse.get(alleles.alleleIndex2)));
|
||||
if ( numNewAltAlleles != 0 ) gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods));
|
||||
}
|
||||
newGTs.add(gb.make());
|
||||
}
|
||||
}
|
||||
|
||||
return newGTs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Assign genotypes (GTs) to the samples in the Variant Context greedily based on the PLs
|
||||
*
|
||||
* @param vc variant context with genotype likelihoods
|
||||
* @return genotypes context
|
||||
*/
|
||||
public static GenotypesContext assignDiploidGenotypes(final VariantContext vc) {
|
||||
return subsetDiploidAlleles(vc, vc.getAlleles(), true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Split variant context into its biallelic components if there are more than 2 alleles
|
||||
*
|
||||
* For VC has A/B/C alleles, returns A/B and A/C contexts.
|
||||
* Genotypes are all no-calls now (it's not possible to fix them easily)
|
||||
* Alleles are right trimmed to satisfy VCF conventions
|
||||
*
|
||||
* If vc is biallelic or non-variant it is just returned
|
||||
*
|
||||
* Chromosome counts are updated (but they are by definition 0)
|
||||
*
|
||||
* @param vc a potentially multi-allelic variant context
|
||||
* @return a list of bi-allelic (or monomorphic) variant context
|
||||
*/
|
||||
public static List<VariantContext> splitVariantContextToBiallelics(final VariantContext vc) {
|
||||
if ( ! vc.isVariant() || vc.isBiallelic() )
|
||||
// non variant or biallelics already satisfy the contract
|
||||
return Collections.singletonList(vc);
|
||||
else {
|
||||
final List<VariantContext> biallelics = new LinkedList<VariantContext>();
|
||||
|
||||
for ( final Allele alt : vc.getAlternateAlleles() ) {
|
||||
VariantContextBuilder builder = new VariantContextBuilder(vc);
|
||||
final List<Allele> alleles = Arrays.asList(vc.getReference(), alt);
|
||||
builder.alleles(alleles);
|
||||
builder.genotypes(subsetDiploidAlleles(vc, alleles, false));
|
||||
VariantContextUtils.calculateChromosomeCounts(builder, true);
|
||||
biallelics.add(reverseTrimAlleles(builder.make()));
|
||||
}
|
||||
|
||||
return biallelics;
|
||||
}
|
||||
}
|
||||
|
||||
public static Genotype removePLsAndAD(final Genotype g) {
|
||||
return ( g.hasLikelihoods() || g.hasAD() ) ? new GenotypeBuilder(g).noPL().noAD().make() : g;
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges VariantContexts into a single hybrid. Takes genotypes for common samples in priority order, if provided.
|
||||
* If uniquifySamples is true, the priority order is ignored and names are created by concatenating the VC name with
|
||||
* the sample name
|
||||
*
|
||||
* @param unsortedVCs collection of unsorted VCs
|
||||
* @param priorityListOfVCs priority list detailing the order in which we should grab the VCs
|
||||
* @param filteredRecordMergeType merge type for filtered records
|
||||
* @param genotypeMergeOptions merge option for genotypes
|
||||
* @param annotateOrigin should we annotate the set it came from?
|
||||
* @param printMessages should we print messages?
|
||||
* @param setKey the key name of the set
|
||||
* @param filteredAreUncalled are filtered records uncalled?
|
||||
* @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count?
|
||||
* @return new VariantContext representing the merge of unsortedVCs
|
||||
*/
|
||||
public static VariantContext simpleMerge(final Collection<VariantContext> unsortedVCs,
|
||||
final List<String> priorityListOfVCs,
|
||||
final FilteredRecordMergeType filteredRecordMergeType,
|
||||
final GenotypeMergeType genotypeMergeOptions,
|
||||
final boolean annotateOrigin,
|
||||
final boolean printMessages,
|
||||
final String setKey,
|
||||
final boolean filteredAreUncalled,
|
||||
final boolean mergeInfoWithMaxAC ) {
|
||||
int originalNumOfVCs = priorityListOfVCs == null ? 0 : priorityListOfVCs.size();
|
||||
return simpleMerge(unsortedVCs,priorityListOfVCs,originalNumOfVCs,filteredRecordMergeType,genotypeMergeOptions,annotateOrigin,printMessages,setKey,filteredAreUncalled,mergeInfoWithMaxAC);
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges VariantContexts into a single hybrid. Takes genotypes for common samples in priority order, if provided.
|
||||
* If uniquifySamples is true, the priority order is ignored and names are created by concatenating the VC name with
|
||||
* the sample name.
|
||||
* simpleMerge does not verify any more unique sample names EVEN if genotypeMergeOptions == GenotypeMergeType.REQUIRE_UNIQUE. One should use
|
||||
* SampleUtils.verifyUniqueSamplesNames to check that before using sempleMerge.
|
||||
*
|
||||
* @param unsortedVCs collection of unsorted VCs
|
||||
* @param priorityListOfVCs priority list detailing the order in which we should grab the VCs
|
||||
* @param filteredRecordMergeType merge type for filtered records
|
||||
* @param genotypeMergeOptions merge option for genotypes
|
||||
* @param annotateOrigin should we annotate the set it came from?
|
||||
* @param printMessages should we print messages?
|
||||
* @param setKey the key name of the set
|
||||
* @param filteredAreUncalled are filtered records uncalled?
|
||||
* @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count?
|
||||
* @return new VariantContext representing the merge of unsortedVCs
|
||||
*/
|
||||
public static VariantContext simpleMerge(final Collection<VariantContext> unsortedVCs,
|
||||
final List<String> priorityListOfVCs,
|
||||
final int originalNumOfVCs,
|
||||
final FilteredRecordMergeType filteredRecordMergeType,
|
||||
final GenotypeMergeType genotypeMergeOptions,
|
||||
final boolean annotateOrigin,
|
||||
final boolean printMessages,
|
||||
final String setKey,
|
||||
final boolean filteredAreUncalled,
|
||||
final boolean mergeInfoWithMaxAC ) {
|
||||
|
||||
if ( unsortedVCs == null || unsortedVCs.size() == 0 )
|
||||
return null;
|
||||
|
||||
if (priorityListOfVCs != null && originalNumOfVCs != priorityListOfVCs.size())
|
||||
throw new IllegalArgumentException("the number of the original VariantContexts must be the same as the number of VariantContexts in the priority list");
|
||||
|
||||
if ( annotateOrigin && priorityListOfVCs == null && originalNumOfVCs == 0)
|
||||
throw new IllegalArgumentException("Cannot merge calls and annotate their origins without a complete priority list of VariantContexts or the number of original VariantContexts");
|
||||
|
||||
final List<VariantContext> preFilteredVCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, genotypeMergeOptions);
|
||||
// Make sure all variant contexts are padded with reference base in case of indels if necessary
|
||||
final List<VariantContext> VCs = new ArrayList<VariantContext>();
|
||||
|
||||
for (final VariantContext vc : preFilteredVCs) {
|
||||
if ( ! filteredAreUncalled || vc.isNotFiltered() )
|
||||
VCs.add(vc);
|
||||
}
|
||||
if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled
|
||||
return null;
|
||||
|
||||
// establish the baseline info from the first VC
|
||||
final VariantContext first = VCs.get(0);
|
||||
final String name = first.getSource();
|
||||
final Allele refAllele = determineReferenceAllele(VCs);
|
||||
|
||||
final Set<Allele> alleles = new LinkedHashSet<Allele>();
|
||||
final Set<String> filters = new HashSet<String>();
|
||||
final Map<String, Object> attributes = new LinkedHashMap<String, Object>();
|
||||
final Set<String> inconsistentAttributes = new HashSet<String>();
|
||||
final Set<String> variantSources = new HashSet<String>(); // contains the set of sources we found in our set of VCs that are variant
|
||||
final Set<String> rsIDs = new LinkedHashSet<String>(1); // most of the time there's one id
|
||||
|
||||
VariantContext longestVC = first;
|
||||
int depth = 0;
|
||||
int maxAC = -1;
|
||||
final Map<String, Object> attributesWithMaxAC = new LinkedHashMap<String, Object>();
|
||||
double log10PError = CommonInfo.NO_LOG10_PERROR;
|
||||
VariantContext vcWithMaxAC = null;
|
||||
GenotypesContext genotypes = GenotypesContext.create();
|
||||
|
||||
// counting the number of filtered and variant VCs
|
||||
int nFiltered = 0;
|
||||
|
||||
boolean remapped = false;
|
||||
|
||||
// cycle through and add info from the other VCs, making sure the loc/reference matches
|
||||
|
||||
for ( final VariantContext vc : VCs ) {
|
||||
if ( longestVC.getStart() != vc.getStart() )
|
||||
throw new IllegalStateException("BUG: attempting to merge VariantContexts with different start sites: first="+ first.toString() + " second=" + vc.toString());
|
||||
|
||||
if ( VariantContextUtils.getSize(vc) > VariantContextUtils.getSize(longestVC) )
|
||||
longestVC = vc; // get the longest location
|
||||
|
||||
nFiltered += vc.isFiltered() ? 1 : 0;
|
||||
if ( vc.isVariant() ) variantSources.add(vc.getSource());
|
||||
|
||||
AlleleMapper alleleMapping = resolveIncompatibleAlleles(refAllele, vc, alleles);
|
||||
remapped = remapped || alleleMapping.needsRemapping();
|
||||
|
||||
alleles.addAll(alleleMapping.values());
|
||||
|
||||
mergeGenotypes(genotypes, vc, alleleMapping, genotypeMergeOptions == GenotypeMergeType.UNIQUIFY);
|
||||
|
||||
// We always take the QUAL of the first VC with a non-MISSING qual for the combined value
|
||||
if ( log10PError == CommonInfo.NO_LOG10_PERROR )
|
||||
log10PError = vc.getLog10PError();
|
||||
|
||||
filters.addAll(vc.getFilters());
|
||||
|
||||
//
|
||||
// add attributes
|
||||
//
|
||||
// special case DP (add it up) and ID (just preserve it)
|
||||
//
|
||||
if (vc.hasAttribute(VCFConstants.DEPTH_KEY))
|
||||
depth += vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0);
|
||||
if ( vc.hasID() ) rsIDs.add(vc.getID());
|
||||
if (mergeInfoWithMaxAC && vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY)) {
|
||||
String rawAlleleCounts = vc.getAttributeAsString(VCFConstants.ALLELE_COUNT_KEY, null);
|
||||
// lets see if the string contains a , separator
|
||||
if (rawAlleleCounts.contains(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR)) {
|
||||
List<String> alleleCountArray = Arrays.asList(rawAlleleCounts.substring(1, rawAlleleCounts.length() - 1).split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR));
|
||||
for (String alleleCount : alleleCountArray) {
|
||||
final int ac = Integer.valueOf(alleleCount.trim());
|
||||
if (ac > maxAC) {
|
||||
maxAC = ac;
|
||||
vcWithMaxAC = vc;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
final int ac = Integer.valueOf(rawAlleleCounts);
|
||||
if (ac > maxAC) {
|
||||
maxAC = ac;
|
||||
vcWithMaxAC = vc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (final Map.Entry<String, Object> p : vc.getAttributes().entrySet()) {
|
||||
String key = p.getKey();
|
||||
// if we don't like the key already, don't go anywhere
|
||||
if ( ! inconsistentAttributes.contains(key) ) {
|
||||
final boolean alreadyFound = attributes.containsKey(key);
|
||||
final Object boundValue = attributes.get(key);
|
||||
final boolean boundIsMissingValue = alreadyFound && boundValue.equals(VCFConstants.MISSING_VALUE_v4);
|
||||
|
||||
if ( alreadyFound && ! boundValue.equals(p.getValue()) && ! boundIsMissingValue ) {
|
||||
// we found the value but we're inconsistent, put it in the exclude list
|
||||
//System.out.printf("Inconsistent INFO values: %s => %s and %s%n", key, boundValue, p.getValue());
|
||||
inconsistentAttributes.add(key);
|
||||
attributes.remove(key);
|
||||
} else if ( ! alreadyFound || boundIsMissingValue ) { // no value
|
||||
//if ( vc != first ) System.out.printf("Adding key %s => %s%n", p.getKey(), p.getValue());
|
||||
attributes.put(key, p.getValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// if we have more alternate alleles in the merged VC than in one or more of the
|
||||
// original VCs, we need to strip out the GL/PLs (because they are no longer accurate), as well as allele-dependent attributes like AC,AF, and AD
|
||||
for ( final VariantContext vc : VCs ) {
|
||||
if (vc.getAlleles().size() == 1)
|
||||
continue;
|
||||
if ( hasPLIncompatibleAlleles(alleles, vc.getAlleles())) {
|
||||
if ( ! genotypes.isEmpty() ) {
|
||||
logger.debug(String.format("Stripping PLs at %s:%d-%d due to incompatible alleles merged=%s vs. single=%s",
|
||||
vc.getChr(), vc.getStart(), vc.getEnd(), alleles, vc.getAlleles()));
|
||||
}
|
||||
genotypes = stripPLsAndAD(genotypes);
|
||||
// this will remove stale AC,AF attributed from vc
|
||||
VariantContextUtils.calculateChromosomeCounts(vc, attributes, true);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// take the VC with the maxAC and pull the attributes into a modifiable map
|
||||
if ( mergeInfoWithMaxAC && vcWithMaxAC != null ) {
|
||||
attributesWithMaxAC.putAll(vcWithMaxAC.getAttributes());
|
||||
}
|
||||
|
||||
// if at least one record was unfiltered and we want a union, clear all of the filters
|
||||
if ( (filteredRecordMergeType == FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED && nFiltered != VCs.size()) || filteredRecordMergeType == FilteredRecordMergeType.KEEP_UNCONDITIONAL )
|
||||
filters.clear();
|
||||
|
||||
|
||||
if ( annotateOrigin ) { // we care about where the call came from
|
||||
String setValue;
|
||||
if ( nFiltered == 0 && variantSources.size() == originalNumOfVCs ) // nothing was unfiltered
|
||||
setValue = MERGE_INTERSECTION;
|
||||
else if ( nFiltered == VCs.size() ) // everything was filtered out
|
||||
setValue = MERGE_FILTER_IN_ALL;
|
||||
else if ( variantSources.isEmpty() ) // everyone was reference
|
||||
setValue = MERGE_REF_IN_ALL;
|
||||
else {
|
||||
final LinkedHashSet<String> s = new LinkedHashSet<String>();
|
||||
for ( final VariantContext vc : VCs )
|
||||
if ( vc.isVariant() )
|
||||
s.add( vc.isFiltered() ? MERGE_FILTER_PREFIX + vc.getSource() : vc.getSource() );
|
||||
setValue = Utils.join("-", s);
|
||||
}
|
||||
|
||||
if ( setKey != null ) {
|
||||
attributes.put(setKey, setValue);
|
||||
if( mergeInfoWithMaxAC && vcWithMaxAC != null ) {
|
||||
attributesWithMaxAC.put(setKey, setValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( depth > 0 )
|
||||
attributes.put(VCFConstants.DEPTH_KEY, String.valueOf(depth));
|
||||
|
||||
final String ID = rsIDs.isEmpty() ? VCFConstants.EMPTY_ID_FIELD : Utils.join(",", rsIDs);
|
||||
|
||||
final VariantContextBuilder builder = new VariantContextBuilder().source(name).id(ID);
|
||||
builder.loc(longestVC.getChr(), longestVC.getStart(), longestVC.getEnd());
|
||||
builder.alleles(alleles);
|
||||
builder.genotypes(genotypes);
|
||||
builder.log10PError(log10PError);
|
||||
builder.filters(filters.isEmpty() ? filters : new TreeSet<String>(filters));
|
||||
builder.attributes(new TreeMap<String, Object>(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes));
|
||||
|
||||
// Trim the padded bases of all alleles if necessary
|
||||
final VariantContext merged = builder.make();
|
||||
if ( printMessages && remapped ) System.out.printf("Remapped => %s%n", merged);
|
||||
return merged;
|
||||
}
|
||||
|
||||
private static final boolean hasPLIncompatibleAlleles(final Collection<Allele> alleleSet1, final Collection<Allele> alleleSet2) {
|
||||
final Iterator<Allele> it1 = alleleSet1.iterator();
|
||||
final Iterator<Allele> it2 = alleleSet2.iterator();
|
||||
|
||||
while ( it1.hasNext() && it2.hasNext() ) {
|
||||
final Allele a1 = it1.next();
|
||||
final Allele a2 = it2.next();
|
||||
if ( ! a1.equals(a2) )
|
||||
return true;
|
||||
}
|
||||
|
||||
// by this point, at least one of the iterators is empty. All of the elements
|
||||
// we've compared are equal up until this point. But it's possible that the
|
||||
// sets aren't the same size, which is indicated by the test below. If they
|
||||
// are of the same size, though, the sets are compatible
|
||||
return it1.hasNext() || it2.hasNext();
|
||||
}
|
||||
|
||||
public static GenotypesContext stripPLsAndAD(GenotypesContext genotypes) {
|
||||
GenotypesContext newGs = GenotypesContext.create(genotypes.size());
|
||||
|
||||
for ( final Genotype g : genotypes ) {
|
||||
newGs.add(removePLsAndAD(g));
|
||||
}
|
||||
|
||||
return newGs;
|
||||
}
|
||||
|
||||
static private Allele determineReferenceAllele(List<VariantContext> VCs) {
|
||||
Allele ref = null;
|
||||
|
||||
for ( VariantContext vc : VCs ) {
|
||||
Allele myRef = vc.getReference();
|
||||
if ( ref == null || ref.length() < myRef.length() )
|
||||
ref = myRef;
|
||||
else if ( ref.length() == myRef.length() && ! ref.equals(myRef) )
|
||||
throw new TribbleException(String.format("The provided variant file(s) have inconsistent references for the same position(s) at %s:%d, %s vs. %s", vc.getChr(), vc.getStart(), ref, myRef));
|
||||
}
|
||||
|
||||
return ref;
|
||||
}
|
||||
|
||||
static private AlleleMapper resolveIncompatibleAlleles(Allele refAllele, VariantContext vc, Set<Allele> allAlleles) {
|
||||
if ( refAllele.equals(vc.getReference()) )
|
||||
return new AlleleMapper(vc);
|
||||
else {
|
||||
// we really need to do some work. The refAllele is the longest reference allele seen at this
|
||||
// start site. So imagine it is:
|
||||
//
|
||||
// refAllele: ACGTGA
|
||||
// myRef: ACGT
|
||||
// myAlt: A
|
||||
//
|
||||
// We need to remap all of the alleles in vc to include the extra GA so that
|
||||
// myRef => refAllele and myAlt => AGA
|
||||
//
|
||||
|
||||
Allele myRef = vc.getReference();
|
||||
if ( refAllele.length() <= myRef.length() ) throw new IllegalStateException("BUG: myRef="+myRef+" is longer than refAllele="+refAllele);
|
||||
byte[] extraBases = Arrays.copyOfRange(refAllele.getBases(), myRef.length(), refAllele.length());
|
||||
|
||||
// System.out.printf("Remapping allele at %s%n", vc);
|
||||
// System.out.printf("ref %s%n", refAllele);
|
||||
// System.out.printf("myref %s%n", myRef );
|
||||
// System.out.printf("extrabases %s%n", new String(extraBases));
|
||||
|
||||
Map<Allele, Allele> map = new HashMap<Allele, Allele>();
|
||||
for ( Allele a : vc.getAlleles() ) {
|
||||
if ( a.isReference() )
|
||||
map.put(a, refAllele);
|
||||
else {
|
||||
Allele extended = Allele.extend(a, extraBases);
|
||||
for ( Allele b : allAlleles )
|
||||
if ( extended.equals(b) )
|
||||
extended = b;
|
||||
// System.out.printf(" Extending %s => %s%n", a, extended);
|
||||
map.put(a, extended);
|
||||
}
|
||||
}
|
||||
|
||||
// debugging
|
||||
// System.out.printf("mapping %s%n", map);
|
||||
|
||||
return new AlleleMapper(map);
|
||||
}
|
||||
}
|
||||
|
||||
public static List<VariantContext> sortVariantContextsByPriority(Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs, GenotypeMergeType mergeOption ) {
|
||||
if ( mergeOption == GenotypeMergeType.PRIORITIZE && priorityListOfVCs == null )
|
||||
throw new IllegalArgumentException("Cannot merge calls by priority with a null priority list");
|
||||
|
||||
if ( priorityListOfVCs == null || mergeOption == GenotypeMergeType.UNSORTED )
|
||||
return new ArrayList<VariantContext>(unsortedVCs);
|
||||
else {
|
||||
ArrayList<VariantContext> sorted = new ArrayList<VariantContext>(unsortedVCs);
|
||||
Collections.sort(sorted, new CompareByPriority(priorityListOfVCs));
|
||||
return sorted;
|
||||
}
|
||||
}
|
||||
|
||||
private static void mergeGenotypes(GenotypesContext mergedGenotypes, VariantContext oneVC, AlleleMapper alleleMapping, boolean uniqifySamples) {
|
||||
//TODO: should we add a check for cases when the genotypeMergeOption is REQUIRE_UNIQUE
|
||||
for ( Genotype g : oneVC.getGenotypes() ) {
|
||||
String name = mergedSampleName(oneVC.getSource(), g.getSampleName(), uniqifySamples);
|
||||
if ( ! mergedGenotypes.containsSample(name) ) {
|
||||
// only add if the name is new
|
||||
Genotype newG = g;
|
||||
|
||||
if ( uniqifySamples || alleleMapping.needsRemapping() ) {
|
||||
final List<Allele> alleles = alleleMapping.needsRemapping() ? alleleMapping.remap(g.getAlleles()) : g.getAlleles();
|
||||
newG = new GenotypeBuilder(g).name(name).alleles(alleles).make();
|
||||
}
|
||||
|
||||
mergedGenotypes.add(newG);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static String mergedSampleName(String trackName, String sampleName, boolean uniqify ) {
|
||||
return uniqify ? sampleName + "." + trackName : sampleName;
|
||||
}
|
||||
|
||||
public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) {
|
||||
|
||||
// see whether we need to trim common reference base from all alleles
|
||||
final int trimExtent = computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, false);
|
||||
if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 )
|
||||
return inputVC;
|
||||
|
||||
final List<Allele> alleles = new ArrayList<Allele>();
|
||||
final GenotypesContext genotypes = GenotypesContext.create();
|
||||
final Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
|
||||
|
||||
for (final Allele a : inputVC.getAlleles()) {
|
||||
if (a.isSymbolic()) {
|
||||
alleles.add(a);
|
||||
originalToTrimmedAlleleMap.put(a, a);
|
||||
} else {
|
||||
// get bases for current allele and create a new one with trimmed bases
|
||||
final byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent);
|
||||
final Allele trimmedAllele = Allele.create(newBases, a.isReference());
|
||||
alleles.add(trimmedAllele);
|
||||
originalToTrimmedAlleleMap.put(a, trimmedAllele);
|
||||
}
|
||||
}
|
||||
|
||||
// now we can recreate new genotypes with trimmed alleles
|
||||
for ( final Genotype genotype : inputVC.getGenotypes() ) {
|
||||
final List<Allele> originalAlleles = genotype.getAlleles();
|
||||
final List<Allele> trimmedAlleles = new ArrayList<Allele>();
|
||||
for ( final Allele a : originalAlleles ) {
|
||||
if ( a.isCalled() )
|
||||
trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
|
||||
else
|
||||
trimmedAlleles.add(Allele.NO_CALL);
|
||||
}
|
||||
genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make());
|
||||
}
|
||||
|
||||
return new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length() - 1).alleles(alleles).genotypes(genotypes).make();
|
||||
}
|
||||
|
||||
public static int computeReverseClipping(final List<Allele> unclippedAlleles,
|
||||
final byte[] ref,
|
||||
final int forwardClipping,
|
||||
final boolean allowFullClip) {
|
||||
int clipping = 0;
|
||||
boolean stillClipping = true;
|
||||
|
||||
while ( stillClipping ) {
|
||||
for ( final Allele a : unclippedAlleles ) {
|
||||
if ( a.isSymbolic() )
|
||||
continue;
|
||||
|
||||
// we need to ensure that we don't reverse clip out all of the bases from an allele because we then will have the wrong
|
||||
// position set for the VariantContext (although it's okay to forward clip it all out, because the position will be fine).
|
||||
if ( a.length() - clipping == 0 )
|
||||
return clipping - (allowFullClip ? 0 : 1);
|
||||
|
||||
if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 ) {
|
||||
stillClipping = false;
|
||||
}
|
||||
else if ( ref.length == clipping ) {
|
||||
if ( allowFullClip )
|
||||
stillClipping = false;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
else if ( a.getBases()[a.length()-clipping-1] != ref[ref.length-clipping-1] ) {
|
||||
stillClipping = false;
|
||||
}
|
||||
}
|
||||
if ( stillClipping )
|
||||
clipping++;
|
||||
}
|
||||
|
||||
return clipping;
|
||||
}
|
||||
|
||||
public static double computeHardyWeinbergPvalue(VariantContext vc) {
|
||||
if ( vc.getCalledChrCount() == 0 )
|
||||
return 0.0;
|
||||
return HardyWeinbergCalculation.hwCalculate(vc.getHomRefCount(), vc.getHetCount(), vc.getHomVarCount());
|
||||
}
|
||||
|
||||
public static boolean requiresPaddingBase(final List<String> alleles) {
|
||||
|
||||
// see whether one of the alleles would be null if trimmed through
|
||||
|
||||
for ( final String allele : alleles ) {
|
||||
if ( allele.isEmpty() )
|
||||
return true;
|
||||
}
|
||||
|
||||
int clipping = 0;
|
||||
Character currentBase = null;
|
||||
|
||||
while ( true ) {
|
||||
for ( final String allele : alleles ) {
|
||||
if ( allele.length() - clipping == 0 )
|
||||
return true;
|
||||
|
||||
char myBase = allele.charAt(clipping);
|
||||
if ( currentBase == null )
|
||||
currentBase = myBase;
|
||||
else if ( currentBase != myBase )
|
||||
return false;
|
||||
}
|
||||
|
||||
clipping++;
|
||||
currentBase = null;
|
||||
}
|
||||
}
|
||||
|
||||
private static class AlleleMapper {
|
||||
private VariantContext vc = null;
|
||||
private Map<Allele, Allele> map = null;
|
||||
public AlleleMapper(VariantContext vc) { this.vc = vc; }
|
||||
public AlleleMapper(Map<Allele, Allele> map) { this.map = map; }
|
||||
public boolean needsRemapping() { return this.map != null; }
|
||||
public Collection<Allele> values() { return map != null ? map.values() : vc.getAlleles(); }
|
||||
public Allele remap(Allele a) { return map != null && map.containsKey(a) ? map.get(a) : a; }
|
||||
|
||||
public List<Allele> remap(List<Allele> as) {
|
||||
List<Allele> newAs = new ArrayList<Allele>();
|
||||
for ( Allele a : as ) {
|
||||
//System.out.printf(" Remapping %s => %s%n", a, remap(a));
|
||||
newAs.add(remap(a));
|
||||
}
|
||||
return newAs;
|
||||
}
|
||||
}
|
||||
|
||||
private static class CompareByPriority implements Comparator<VariantContext>, Serializable {
|
||||
List<String> priorityListOfVCs;
|
||||
public CompareByPriority(List<String> priorityListOfVCs) {
|
||||
this.priorityListOfVCs = priorityListOfVCs;
|
||||
}
|
||||
|
||||
private int getIndex(VariantContext vc) {
|
||||
int i = priorityListOfVCs.indexOf(vc.getSource());
|
||||
if ( i == -1 ) throw new IllegalArgumentException("Priority list " + priorityListOfVCs + " doesn't contain variant context " + vc.getSource());
|
||||
return i;
|
||||
}
|
||||
|
||||
public int compare(VariantContext vc1, VariantContext vc2) {
|
||||
return Integer.valueOf(getIndex(vc1)).compareTo(getIndex(vc2));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,8 +26,6 @@
|
|||
package org.broadinstitute.variant.utils;
|
||||
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Random;
|
||||
|
|
@ -176,7 +174,7 @@ public class BaseUtils {
|
|||
if ( baseIndex == Base.N.ordinal() ) {
|
||||
bases[i] = 'N';
|
||||
} else if ( errorOnBadReferenceBase && baseIndex == -1 ) {
|
||||
throw new UserException.BadInput("We encountered a non-standard non-IUPAC base in the provided reference: '" + bases[i] + "'");
|
||||
throw new IllegalStateException("We encountered a non-standard non-IUPAC base in the provided reference: '" + bases[i] + "'");
|
||||
}
|
||||
}
|
||||
return bases;
|
||||
|
|
@ -517,7 +515,7 @@ public class BaseUtils {
|
|||
case 'N':
|
||||
return 'N';
|
||||
default:
|
||||
throw new ReviewedStingException("base must be A, C, G or T. " + (char) base + " is not a valid base.");
|
||||
throw new IllegalArgumentException("base must be A, C, G or T. " + (char) base + " is not a valid base.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -141,13 +141,6 @@ public class GeneralUtils {
|
|||
return normalized;
|
||||
}
|
||||
|
||||
public static double sum(double[] values) {
|
||||
double s = 0.0;
|
||||
for (double v : values)
|
||||
s += v;
|
||||
return s;
|
||||
}
|
||||
|
||||
public static double arrayMax(final double[] array) {
|
||||
return array[maxElementIndex(array, array.length)];
|
||||
}
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ import java.util.*;
|
|||
*
|
||||
* @author depristo
|
||||
*/
|
||||
final class CommonInfo {
|
||||
public final class CommonInfo {
|
||||
public static final double NO_LOG10_PERROR = 1.0;
|
||||
|
||||
private static Set<String> NO_FILTERS = Collections.emptySet();
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -28,15 +28,9 @@ package org.broadinstitute.variant.vcf;
|
|||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.apache.commons.io.FilenameUtils;
|
||||
import org.broad.tribble.FeatureCodecHeader;
|
||||
import org.broad.tribble.readers.PositionalBufferedStream;
|
||||
import org.broadinstitute.variant.utils.GeneralUtils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
public class VCFUtils {
|
||||
|
|
@ -106,21 +100,6 @@ public class VCFUtils {
|
|||
return new HashSet<VCFHeaderLine>(map.values());
|
||||
}
|
||||
|
||||
public static String rsIDOfFirstRealVariant(List<VariantContext> VCs, VariantContext.Type type) {
|
||||
if ( VCs == null )
|
||||
return null;
|
||||
|
||||
String rsID = null;
|
||||
for ( VariantContext vc : VCs ) {
|
||||
if ( vc.getType() == type ) {
|
||||
rsID = vc.getID();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return rsID;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add / replace the contig header lines in the VCFHeader with the in the reference file and master reference dictionary
|
||||
*
|
||||
|
|
@ -198,35 +177,6 @@ public class VCFUtils {
|
|||
return assembly;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read all of the VCF records from source into memory, returning the header and the VariantContexts
|
||||
*
|
||||
* @param source the file to read, must be in VCF4 format
|
||||
* @return
|
||||
* @throws java.io.IOException
|
||||
*/
|
||||
public static Pair<VCFHeader, List<VariantContext>> readVCF(final File source) throws IOException {
|
||||
// read in the features
|
||||
final List<VariantContext> vcs = new ArrayList<VariantContext>();
|
||||
final VCFCodec codec = new VCFCodec();
|
||||
PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(source));
|
||||
FeatureCodecHeader header = codec.readHeader(pbs);
|
||||
pbs.close();
|
||||
|
||||
pbs = new PositionalBufferedStream(new FileInputStream(source));
|
||||
pbs.skip(header.getHeaderEnd());
|
||||
|
||||
final VCFHeader vcfHeader = (VCFHeader)header.getHeaderValue();
|
||||
|
||||
while ( ! pbs.isDone() ) {
|
||||
final VariantContext vc = codec.decode(pbs);
|
||||
if ( vc != null )
|
||||
vcs.add(vc);
|
||||
}
|
||||
|
||||
return new Pair<VCFHeader, List<VariantContext>>(vcfHeader, vcs);
|
||||
}
|
||||
|
||||
/** Only displays a warning if warnings are enabled and an identical warning hasn't been already issued */
|
||||
private static final class HeaderConflictWarner {
|
||||
boolean emitWarnings;
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
|||
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.variant.bcf2.BCF2Utils;
|
||||
import org.broadinstitute.variant.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.variant.vcf.VCFCodec;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
|
|
|
|||
|
|
@ -23,8 +23,9 @@
|
|||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting;
|
||||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.AutoFormattingTime;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
|
|
@ -0,0 +1,81 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import com.google.caliper.Param;
|
||||
import com.google.caliper.SimpleBenchmark;
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
/**
|
||||
* Caliper microbenchmark of genome loc parser
|
||||
*/
|
||||
public class GenomeLocParserBenchmark extends SimpleBenchmark {
|
||||
private IndexedFastaSequenceFile seq;
|
||||
private final int ITERATIONS = 1000000;
|
||||
|
||||
@Param({"NEW", "NONE"})
|
||||
GenomeLocParser.ValidationLevel validationLevel; // set automatically by framework
|
||||
|
||||
@Param({"true", "false"})
|
||||
boolean useContigIndex; // set automatically by framework
|
||||
|
||||
@Override protected void setUp() throws Exception {
|
||||
seq = new CachingIndexedFastaSequenceFile(new File("/Users/depristo/Desktop/broadLocal/localData/human_g1k_v37.fasta"));
|
||||
}
|
||||
//
|
||||
// public void timeSequentialCreationFromGenomeLoc(int rep) {
|
||||
// final GenomeLocParser genomeLocParser = new GenomeLocParser(seq.getSequenceDictionary(), validationLevel);
|
||||
// GenomeLoc last = genomeLocParser.createGenomeLoc("1", 1, 1);
|
||||
// for ( int i = 0; i < rep; i++ ) {
|
||||
// for ( int j = 1; j < ITERATIONS; j++ ) {
|
||||
// if ( useContigIndex )
|
||||
// last = genomeLocParser.createGenomeLoc(last.getContig(), last.getContigIndex(), last.getStart() + 1);
|
||||
// else
|
||||
// last = genomeLocParser.createGenomeLoc(last.getContig(), last.getStart() + 1);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// public void timeSequentialCreationFromGenomeLocOriginal(int rep) {
|
||||
// final GenomeLocParserOriginal genomeLocParser = new GenomeLocParserOriginal(seq.getSequenceDictionary());
|
||||
// GenomeLoc last = genomeLocParser.createGenomeLoc("1", 1, 1);
|
||||
// for ( int i = 0; i < rep; i++ ) {
|
||||
// for ( int j = 1; j < ITERATIONS; j++ ) {
|
||||
// if ( useContigIndex )
|
||||
// last = genomeLocParser.createGenomeLoc(last.getContig(), last.getContigIndex(), last.getStart() + 1);
|
||||
// else
|
||||
// last = genomeLocParser.createGenomeLoc(last.getContig(), last.getStart() + 1);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
public static void main(String[] args) {
|
||||
com.google.caliper.Runner.main(GenomeLocParserBenchmark.class, args);
|
||||
}
|
||||
}
|
||||
|
|
@ -29,17 +29,31 @@ package org.broadinstitute.sting.utils;
|
|||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.broad.tribble.BasicFeature;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
|
||||
import static org.testng.Assert.assertEquals;
|
||||
import static org.testng.Assert.assertTrue;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextBuilder;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
import static org.testng.Assert.assertEquals;
|
||||
import static org.testng.Assert.assertTrue;
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
|
|
@ -49,10 +63,11 @@ import org.testng.annotations.Test;
|
|||
*/
|
||||
public class GenomeLocParserUnitTest extends BaseTest {
|
||||
private GenomeLocParser genomeLocParser;
|
||||
private SAMFileHeader header;
|
||||
|
||||
@BeforeClass
|
||||
public void init() {
|
||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10);
|
||||
header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10);
|
||||
genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
||||
}
|
||||
|
||||
|
|
@ -231,7 +246,16 @@ public class GenomeLocParserUnitTest extends BaseTest {
|
|||
assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",1,11)); // past the end of the contig
|
||||
assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",-1,10)); // bad start
|
||||
assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",1,-2)); // bad stop
|
||||
assertTrue( genomeLocParser.isValidGenomeLoc("chr1",-1,2, false)); // bad stop
|
||||
assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",10,11)); // bad start, past end
|
||||
assertTrue( genomeLocParser.isValidGenomeLoc("chr1",10,11, false)); // bad start, past end
|
||||
assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",2,1)); // stop < start
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = ReviewedStingException.class)
|
||||
public void testValidateGenomeLoc() {
|
||||
// bad contig index
|
||||
genomeLocParser.validateGenomeLoc("chr1", 1, 1, 2, false);
|
||||
}
|
||||
|
||||
private static class FlankingGenomeLocTestData extends TestDataProvider {
|
||||
|
|
@ -333,4 +357,153 @@ public class GenomeLocParserUnitTest extends BaseTest {
|
|||
data.toString(), data.original, actual, data.flankStop);
|
||||
assertEquals(actual, data.flankStop, description);
|
||||
}
|
||||
|
||||
@DataProvider(name = "parseGenomeLoc")
|
||||
public Object[][] makeParsingTest() {
|
||||
final List<Object[]> tests = new LinkedList<Object[]>();
|
||||
|
||||
tests.add(new Object[]{ "chr1:10", "chr1", 10 });
|
||||
tests.add(new Object[]{ "chr1:100", "chr1", 100 });
|
||||
tests.add(new Object[]{ "chr1:1000", "chr1", 1000 });
|
||||
tests.add(new Object[]{ "chr1:1,000", "chr1", 1000 });
|
||||
tests.add(new Object[]{ "chr1:10000", "chr1", 10000 });
|
||||
tests.add(new Object[]{ "chr1:10,000", "chr1", 10000 });
|
||||
tests.add(new Object[]{ "chr1:100000", "chr1", 100000 });
|
||||
tests.add(new Object[]{ "chr1:100,000", "chr1", 100000 });
|
||||
tests.add(new Object[]{ "chr1:1000000", "chr1", 1000000 });
|
||||
tests.add(new Object[]{ "chr1:1,000,000", "chr1", 1000000 });
|
||||
tests.add(new Object[]{ "chr1:1000,000", "chr1", 1000000 });
|
||||
tests.add(new Object[]{ "chr1:1,000000", "chr1", 1000000 });
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test( dataProvider = "parseGenomeLoc")
|
||||
public void testParsingPositions(final String string, final String contig, final int start) {
|
||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10000000);
|
||||
GenomeLocParser genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
||||
final GenomeLoc loc = genomeLocParser.parseGenomeLoc(string);
|
||||
Assert.assertEquals(loc.getContig(), contig);
|
||||
Assert.assertEquals(loc.getStart(), start);
|
||||
Assert.assertEquals(loc.getStop(), start);
|
||||
}
|
||||
|
||||
@Test( )
|
||||
public void testCreationFromSAMRecord() {
|
||||
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 1, 5);
|
||||
final GenomeLoc loc = genomeLocParser.createGenomeLoc(read);
|
||||
Assert.assertEquals(loc.getContig(), read.getReferenceName());
|
||||
Assert.assertEquals(loc.getContigIndex(), (int)read.getReferenceIndex());
|
||||
Assert.assertEquals(loc.getStart(), read.getAlignmentStart());
|
||||
Assert.assertEquals(loc.getStop(), read.getAlignmentEnd());
|
||||
}
|
||||
|
||||
@Test( )
|
||||
public void testCreationFromSAMRecordUnmapped() {
|
||||
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 1, 5);
|
||||
read.setReadUnmappedFlag(true);
|
||||
read.setReferenceIndex(-1);
|
||||
final GenomeLoc loc = genomeLocParser.createGenomeLoc(read);
|
||||
Assert.assertTrue(loc.isUnmapped());
|
||||
}
|
||||
|
||||
@Test( )
|
||||
public void testCreationFromSAMRecordUnmappedButOnGenome() {
|
||||
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 1, 5);
|
||||
read.setReadUnmappedFlag(true);
|
||||
read.setCigarString("*");
|
||||
final GenomeLoc loc = genomeLocParser.createGenomeLoc(read);
|
||||
Assert.assertEquals(loc.getContig(), read.getReferenceName());
|
||||
Assert.assertEquals(loc.getContigIndex(), (int)read.getReferenceIndex());
|
||||
Assert.assertEquals(loc.getStart(), read.getAlignmentStart());
|
||||
Assert.assertEquals(loc.getStop(), read.getAlignmentStart());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreationFromFeature() {
|
||||
final Feature feature = new BasicFeature("chr1", 1, 5);
|
||||
final GenomeLoc loc = genomeLocParser.createGenomeLoc(feature);
|
||||
Assert.assertEquals(loc.getContig(), feature.getChr());
|
||||
Assert.assertEquals(loc.getStart(), feature.getStart());
|
||||
Assert.assertEquals(loc.getStop(), feature.getEnd());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreationFromVariantContext() {
|
||||
final VariantContext feature = new VariantContextBuilder("x", "chr1", 1, 5, Arrays.asList(Allele.create("AAAAA", true))).make();
|
||||
final GenomeLoc loc = genomeLocParser.createGenomeLoc(feature);
|
||||
Assert.assertEquals(loc.getContig(), feature.getChr());
|
||||
Assert.assertEquals(loc.getStart(), feature.getStart());
|
||||
Assert.assertEquals(loc.getStop(), feature.getEnd());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testcreateGenomeLocOnContig() throws FileNotFoundException {
|
||||
final CachingIndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
||||
final SAMSequenceDictionary dict = seq.getSequenceDictionary();
|
||||
final GenomeLocParser genomeLocParser = new GenomeLocParser(dict);
|
||||
|
||||
for ( final SAMSequenceRecord rec : dict.getSequences() ) {
|
||||
final GenomeLoc loc = genomeLocParser.createOverEntireContig(rec.getSequenceName());
|
||||
Assert.assertEquals(loc.getContig(), rec.getSequenceName());
|
||||
Assert.assertEquals(loc.getStart(), 1);
|
||||
Assert.assertEquals(loc.getStop(), rec.getSequenceLength());
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "GenomeLocOnContig")
|
||||
public Object[][] makeGenomeLocOnContig() {
|
||||
final List<Object[]> tests = new LinkedList<Object[]>();
|
||||
|
||||
final int contigLength = header.getSequence(0).getSequenceLength();
|
||||
for ( int start = -10; start < contigLength + 10; start++ ) {
|
||||
for ( final int len : Arrays.asList(1, 10, 20) ) {
|
||||
tests.add(new Object[]{ "chr1", start, start + len });
|
||||
}
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test( dataProvider = "GenomeLocOnContig")
|
||||
public void testGenomeLocOnContig(final String contig, final int start, final int stop) {
|
||||
final int contigLength = header.getSequence(0).getSequenceLength();
|
||||
final GenomeLoc loc = genomeLocParser.createGenomeLocOnContig(contig, start, stop);
|
||||
|
||||
if ( stop < 1 || start > contigLength )
|
||||
Assert.assertNull(loc, "GenomeLoc should be null if the start/stops are not meaningful");
|
||||
else {
|
||||
Assert.assertNotNull(loc);
|
||||
Assert.assertEquals(loc.getContig(), contig);
|
||||
Assert.assertEquals(loc.getStart(), Math.max(start, 1));
|
||||
Assert.assertEquals(loc.getStop(), Math.min(stop, contigLength));
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "GenomeLocPadding")
|
||||
public Object[][] makeGenomeLocPadding() {
|
||||
final List<Object[]> tests = new LinkedList<Object[]>();
|
||||
|
||||
final int contigLength = header.getSequence(0).getSequenceLength();
|
||||
for ( int pad = 0; pad < contigLength + 1; pad++) {
|
||||
for ( int start = 1; start < contigLength; start++ ) {
|
||||
for ( int stop = start; stop < contigLength; stop++ ) {
|
||||
tests.add(new Object[]{ genomeLocParser.createGenomeLoc("chr1", start, stop), pad});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test( dataProvider = "GenomeLocPadding")
|
||||
public void testGenomeLocPadding(final GenomeLoc input, final int pad) {
|
||||
final int contigLength = header.getSequence(0).getSequenceLength();
|
||||
final GenomeLoc padded = genomeLocParser.createPaddedGenomeLoc(input, pad);
|
||||
|
||||
Assert.assertNotNull(padded);
|
||||
Assert.assertEquals(padded.getContig(), input.getContig());
|
||||
Assert.assertEquals(padded.getStart(), Math.max(input.getStart() - pad, 1));
|
||||
Assert.assertEquals(padded.getStop(), Math.min(input.getStop() + pad, contigLength));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,97 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils;
|
||||
|
||||
|
||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
import static org.testng.Assert.assertEquals;
|
||||
import static org.testng.Assert.assertTrue;
|
||||
|
||||
public class MRUCachingSAMSequencingDictionaryUnitTest extends BaseTest {
|
||||
private static ReferenceSequenceFile seq;
|
||||
private static SAMSequenceDictionary dict;
|
||||
|
||||
@BeforeClass
|
||||
public void init() throws FileNotFoundException {
|
||||
// sequence
|
||||
seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
||||
dict = seq.getSequenceDictionary();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBasic() {
|
||||
final MRUCachingSAMSequenceDictionary caching = new MRUCachingSAMSequenceDictionary(dict);
|
||||
|
||||
Assert.assertEquals(caching.getDictionary(), dict, "Dictionary not the one I expected");
|
||||
|
||||
for ( final SAMSequenceRecord rec : dict.getSequences() ) {
|
||||
Assert.assertFalse(caching.isCached(rec.getSequenceIndex()), "Expected index to not be cached");
|
||||
Assert.assertFalse(caching.isCached(rec.getSequenceName()), "Expected contig to not be cached");
|
||||
|
||||
Assert.assertEquals(caching.getSequence(rec.getSequenceName()), rec, "Couldn't query for sequence");
|
||||
Assert.assertEquals(caching.getSequence(rec.getSequenceIndex()), rec, "Couldn't query for sequence index");
|
||||
Assert.assertEquals(caching.hasContig(rec.getSequenceName()), true, "hasContig query for sequence");
|
||||
Assert.assertEquals(caching.hasContigIndex(rec.getSequenceIndex()), true, "hasContigIndex query for sequence");
|
||||
Assert.assertEquals(caching.getSequenceIndex(rec.getSequenceName()), rec.getSequenceIndex(), "Couldn't query for sequence");
|
||||
|
||||
Assert.assertEquals(caching.hasContig(rec.getSequenceName() + "asdfadsfa"), false, "hasContig query for unknown sequence");
|
||||
Assert.assertEquals(caching.hasContigIndex(dict.getSequences().size()), false, "hasContigIndex query for unknown index");
|
||||
|
||||
Assert.assertTrue(caching.isCached(rec.getSequenceIndex()), "Expected index to be cached");
|
||||
Assert.assertTrue(caching.isCached(rec.getSequenceName()), "Expected contig to be cached");
|
||||
}
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = ReviewedStingException.class)
|
||||
public void testBadGetSequence() {
|
||||
final MRUCachingSAMSequenceDictionary caching = new MRUCachingSAMSequenceDictionary(dict);
|
||||
caching.getSequence("notInDictionary");
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = ReviewedStingException.class)
|
||||
public void testBadGetSequenceIndex() {
|
||||
final MRUCachingSAMSequenceDictionary caching = new MRUCachingSAMSequenceDictionary(dict);
|
||||
caching.getSequence(dict.getSequences().size());
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue