Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
cba5f1a8b1
|
|
@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
|
@ -39,6 +38,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
|
||||
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
|
||||
|
||||
// TODO: PERMITS WALKER USED TO HAVE A TEMPORARY FIX to prevent NullPointerException caused by bug:
|
||||
public static boolean PRESERVE_AC_DATA = false;
|
||||
|
||||
protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
||||
super(UAC, N, logger, verboseWriter);
|
||||
|
|
@ -51,7 +52,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
final int numAlleles = alleles.size();
|
||||
|
||||
//linearExact(GLs, log10AlleleFrequencyPriors[0], log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors);
|
||||
linearExactMultiAllelic(GLs, numAlleles - 1, log10AlleleFrequencyPriors, result, false);
|
||||
linearExactMultiAllelic(GLs, numAlleles - 1, log10AlleleFrequencyPriors, result, PRESERVE_AC_DATA);
|
||||
}
|
||||
|
||||
private static final ArrayList<double[]> getGLs(GenotypesContext GLs) {
|
||||
|
|
|
|||
|
|
@ -1,114 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.commandline.ArgumentCollection;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.DownsampleType;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
|
||||
/**
|
||||
* Uses the UG engine to determine per-sample genotype likelihoods and emits them as a VCF (using PLs).
|
||||
* Absolutely not supported or recommended for public use.
|
||||
* Run this as you would the UnifiedGenotyper, except that you must additionally pass in a VCF bound to
|
||||
* the name 'allele' so we know which alternate allele to use at each site.
|
||||
*/
|
||||
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_INPUT)
|
||||
@Reference(window=@Window(start=-200,stop=200))
|
||||
@By(DataSource.READS)
|
||||
@Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=250)
|
||||
public class UGCalcLikelihoods extends LocusWalker<VariantCallContext, Integer> implements TreeReducible<Integer> {
|
||||
|
||||
@ArgumentCollection private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
|
||||
|
||||
// control the output
|
||||
@Output(doc="File to which variants should be written",required=true)
|
||||
protected VCFWriter writer = null;
|
||||
|
||||
// the calculation arguments
|
||||
private UnifiedGenotyperEngine UG_engine = null;
|
||||
|
||||
// enable deletions in the pileup
|
||||
public boolean includeReadsWithDeletionAtLoci() { return true; }
|
||||
|
||||
// enable extended events for indels
|
||||
public boolean generateExtendedEvents() { return UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.SNP; }
|
||||
|
||||
public void initialize() {
|
||||
// get all of the unique sample names
|
||||
Set<String> samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
|
||||
|
||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples);
|
||||
|
||||
// initialize the header
|
||||
Set<VCFHeaderLine> headerInfo = new HashSet<VCFHeaderLine>();
|
||||
headerInfo.add(new VCFInfoHeaderLine(VCFConstants.DOWNSAMPLED_KEY, 0, VCFHeaderLineType.Flag, "Were any of the samples downsampled?"));
|
||||
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
|
||||
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)"));
|
||||
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, 3, VCFHeaderLineType.Float, "Normalized, Phred-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; not applicable if site is not biallelic"));
|
||||
|
||||
writer.writeHeader(new VCFHeader(headerInfo, samples)) ;
|
||||
}
|
||||
|
||||
public VariantCallContext map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
|
||||
VariantContext call = UG_engine.calculateLikelihoods(tracker, refContext, rawContext);
|
||||
return call == null ? null : new VariantCallContext(call, true);
|
||||
}
|
||||
|
||||
public Integer reduceInit() { return 0; }
|
||||
|
||||
public Integer treeReduce(Integer lhs, Integer rhs) {
|
||||
return lhs + rhs;
|
||||
}
|
||||
|
||||
public Integer reduce(VariantCallContext value, Integer sum) {
|
||||
if ( value == null )
|
||||
return sum;
|
||||
|
||||
try {
|
||||
writer.add(value);
|
||||
} catch (IllegalArgumentException e) {
|
||||
throw new IllegalArgumentException(e.getMessage() + "; this is often caused by using the --assume_single_sample_reads argument with the wrong sample name");
|
||||
}
|
||||
|
||||
return sum + 1;
|
||||
}
|
||||
|
||||
public void onTraversalDone(Integer sum) {
|
||||
logger.info(String.format("Visited bases: %d", sum));
|
||||
}
|
||||
}
|
||||
|
|
@ -1,152 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.commandline.ArgumentCollection;
|
||||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Uses the UG engine to call variants based off of VCFs annotated with GLs (or PLs).
|
||||
* Absolutely not supported or recommended for public use.
|
||||
* Run this as you would the UnifiedGenotyper, except that instead of '-I reads' it expects any number
|
||||
* of GL/PL-annotated VCFs bound to a name starting with 'variant'.
|
||||
*/
|
||||
public class UGCallVariants extends RodWalker<VariantCallContext, Integer> {
|
||||
|
||||
@ArgumentCollection
|
||||
private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
|
||||
|
||||
@Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true)
|
||||
public List<RodBinding<VariantContext>> variants;
|
||||
|
||||
// control the output
|
||||
@Output(doc="File to which variants should be written",required=true)
|
||||
protected VCFWriter writer = null;
|
||||
|
||||
// the calculation arguments
|
||||
private UnifiedGenotyperEngine UG_engine = null;
|
||||
|
||||
// variant track names
|
||||
private Set<String> trackNames = new HashSet<String>();
|
||||
|
||||
public void initialize() {
|
||||
|
||||
for ( RodBinding<VariantContext> rb : variants )
|
||||
trackNames.add(rb.getName());
|
||||
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), trackNames);
|
||||
|
||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples);
|
||||
|
||||
Set<VCFHeaderLine> headerInfo = new HashSet<VCFHeaderLine>();
|
||||
headerInfo.add(new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, -1, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"));
|
||||
headerInfo.add(new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, -1, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"));
|
||||
headerInfo.add(new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes"));
|
||||
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
|
||||
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality"));
|
||||
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)"));
|
||||
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, 3, VCFHeaderLineType.Float, "Normalized, Phred-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; not applicable if site is not biallelic"));
|
||||
if ( UAC.STANDARD_CONFIDENCE_FOR_EMITTING < UAC.STANDARD_CONFIDENCE_FOR_CALLING )
|
||||
headerInfo.add(new VCFFilterHeaderLine(UnifiedGenotyperEngine.LOW_QUAL_FILTER_NAME, "Low quality"));
|
||||
|
||||
// initialize the header
|
||||
writer.writeHeader(new VCFHeader(headerInfo, samples));
|
||||
}
|
||||
|
||||
public VariantCallContext map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( tracker == null )
|
||||
return null;
|
||||
|
||||
List<VariantContext> VCs = tracker.getValues(variants, context.getLocation());
|
||||
|
||||
VariantContext mergedVC = mergeVCsWithGLs(VCs);
|
||||
if ( mergedVC == null )
|
||||
return null;
|
||||
|
||||
return UG_engine.calculateGenotypes(tracker, ref, context, mergedVC);
|
||||
}
|
||||
|
||||
public Integer reduceInit() { return 0; }
|
||||
|
||||
public Integer reduce(VariantCallContext value, Integer sum) {
|
||||
if ( value == null )
|
||||
return sum;
|
||||
|
||||
try {
|
||||
VariantContextBuilder builder = new VariantContextBuilder(value);
|
||||
VariantContextUtils.calculateChromosomeCounts(builder, true);
|
||||
writer.add(builder.make());
|
||||
} catch (IllegalArgumentException e) {
|
||||
throw new IllegalArgumentException(e.getMessage() + "; this is often caused by using the --assume_single_sample_reads argument with the wrong sample name");
|
||||
}
|
||||
|
||||
return sum + 1;
|
||||
}
|
||||
|
||||
public void onTraversalDone(Integer result) {
|
||||
logger.info(String.format("Visited sites: %d", result));
|
||||
}
|
||||
|
||||
private static VariantContext mergeVCsWithGLs(List<VariantContext> VCs) {
|
||||
// we can't use the VCUtils classes because our VCs can all be no-calls
|
||||
if ( VCs.size() == 0 )
|
||||
return null;
|
||||
|
||||
VariantContext variantVC = null;
|
||||
GenotypesContext genotypes = GenotypesContext.create();
|
||||
for ( VariantContext vc : VCs ) {
|
||||
if ( variantVC == null && vc.isVariant() )
|
||||
variantVC = vc;
|
||||
genotypes.addAll(getGenotypesWithGLs(vc.getGenotypes()));
|
||||
}
|
||||
|
||||
if ( variantVC == null ) {
|
||||
VariantContext vc = VCs.get(0);
|
||||
throw new UserException("There is no ALT allele in any of the VCF records passed in at " + vc.getChr() + ":" + vc.getStart());
|
||||
}
|
||||
|
||||
return new VariantContextBuilder(variantVC).source("VCwithGLs").genotypes(genotypes).make();
|
||||
}
|
||||
|
||||
private static GenotypesContext getGenotypesWithGLs(GenotypesContext genotypes) {
|
||||
GenotypesContext genotypesWithGLs = GenotypesContext.create(genotypes.size());
|
||||
for ( final Genotype g : genotypes ) {
|
||||
if ( g.hasLikelihoods() && g.getLikelihoods().getAsVector() != null )
|
||||
genotypesWithGLs.add(g);
|
||||
}
|
||||
return genotypesWithGLs;
|
||||
}
|
||||
}
|
||||
|
|
@ -120,6 +120,10 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
|||
@Argument(shortName="filteredRecordsMergeType", doc="Determines how we should handle records seen at the same site in the VCF, but with different FILTER fields", required=false)
|
||||
public VariantContextUtils.FilteredRecordMergeType filteredRecordsMergeType = VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED;
|
||||
|
||||
@Hidden
|
||||
@Argument(shortName="multipleAllelesMergeType", doc="Determines how we should handle records seen at the same site in the VCF, but with different allele types (for example, SNP vs. indel)", required=false)
|
||||
public VariantContextUtils.MultipleAllelesMergeType multipleAllelesMergeType = VariantContextUtils.MultipleAllelesMergeType.BY_TYPE;
|
||||
|
||||
/**
|
||||
* Used when taking the union of variants that contain genotypes. A complete priority list MUST be provided.
|
||||
*/
|
||||
|
|
@ -236,13 +240,24 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
|||
return 0;
|
||||
|
||||
List<VariantContext> mergedVCs = new ArrayList<VariantContext>();
|
||||
Map<VariantContext.Type, List<VariantContext>> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs);
|
||||
// iterate over the types so that it's deterministic
|
||||
for ( VariantContext.Type type : VariantContext.Type.values() ) {
|
||||
if ( VCsByType.containsKey(type) )
|
||||
mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type),
|
||||
priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
|
||||
SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
|
||||
|
||||
if (multipleAllelesMergeType == VariantContextUtils.MultipleAllelesMergeType.BY_TYPE) {
|
||||
Map<VariantContext.Type, List<VariantContext>> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs);
|
||||
// iterate over the types so that it's deterministic
|
||||
for (VariantContext.Type type : VariantContext.Type.values()) {
|
||||
if (VCsByType.containsKey(type))
|
||||
mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type),
|
||||
priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
|
||||
SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
|
||||
}
|
||||
}
|
||||
else if (multipleAllelesMergeType == VariantContextUtils.MultipleAllelesMergeType.MIX_TYPES) {
|
||||
mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), vcs,
|
||||
priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
|
||||
SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
|
||||
}
|
||||
else {
|
||||
logger.warn("Ignoring all records at site " + ref.getLocus());
|
||||
}
|
||||
|
||||
for ( VariantContext mergedVC : mergedVCs ) {
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ import org.apache.commons.jexl2.Expression;
|
|||
import org.apache.commons.jexl2.JexlEngine;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.util.popgen.HardyWeinbergCalculation;
|
||||
import org.broadinstitute.sting.commandline.Hidden;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
|
@ -471,6 +472,18 @@ public class VariantContextUtils {
|
|||
KEEP_UNCONDITIONAL
|
||||
}
|
||||
|
||||
@Hidden
|
||||
public enum MultipleAllelesMergeType {
|
||||
/**
|
||||
* Combine only alleles of the same type (SNP, indel, etc.) into a single VCF record.
|
||||
*/
|
||||
BY_TYPE,
|
||||
/**
|
||||
* Merge all allele types at the same start position into the same VCF record.
|
||||
*/
|
||||
MIX_TYPES
|
||||
}
|
||||
|
||||
/**
|
||||
* Merges VariantContexts into a single hybrid. Takes genotypes for common samples in priority order, if provided.
|
||||
* If uniqifySamples is true, the priority order is ignored and names are created by concatenating the VC name with
|
||||
|
|
|
|||
Loading…
Reference in New Issue