The UG calculations are now driven by an independent engine.
This completely separates the genotyper walker from other walkers. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2758 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d8e75cf631
commit
506d39f751
|
|
@ -1,51 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2009 The Broad Institute
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|
||||||
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.HashSet;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Class for maintaining the arguments that need to be passed to UnifiedGenotyper.runGenotyper()
|
|
||||||
* (so that they only need to be computed one time)
|
|
||||||
*/
|
|
||||||
public class UGCalculationArguments {
|
|
||||||
// should we annotate dbsnp?
|
|
||||||
protected boolean annotateDbsnp = false;
|
|
||||||
// should we annotate hapmap2?
|
|
||||||
protected boolean annotateHapmap2 = false;
|
|
||||||
// should we annotate hapmap3?
|
|
||||||
protected boolean annotateHapmap3 = false;
|
|
||||||
|
|
||||||
// the unified argument collection
|
|
||||||
protected UnifiedArgumentCollection UAC = null;
|
|
||||||
|
|
||||||
// the model used for calculating genotypes
|
|
||||||
protected ThreadLocal<GenotypeCalculationModel> gcm = new ThreadLocal<GenotypeCalculationModel>();
|
|
||||||
|
|
||||||
// samples in input
|
|
||||||
protected Set<String> samples = new HashSet<String>();
|
|
||||||
}
|
|
||||||
|
|
@ -26,19 +26,13 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.*;
|
import org.broadinstitute.sting.gatk.contexts.*;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.*;
|
import org.broadinstitute.sting.gatk.walkers.*;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotator;
|
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotator;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.pileup.*;
|
|
||||||
import org.broadinstitute.sting.utils.cmdLine.*;
|
import org.broadinstitute.sting.utils.cmdLine.*;
|
||||||
import org.broadinstitute.sting.utils.genotype.*;
|
import org.broadinstitute.sting.utils.genotype.*;
|
||||||
import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter;
|
|
||||||
import org.broadinstitute.sting.utils.genotype.glf.GLFGenotypeWriter;
|
|
||||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
@ -65,7 +59,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
||||||
public PrintStream beagleWriter = null;
|
public PrintStream beagleWriter = null;
|
||||||
|
|
||||||
// the calculation arguments
|
// the calculation arguments
|
||||||
private UGCalculationArguments UG_args = null;
|
private UnifiedGenotyperEngine UG_engine = null;
|
||||||
|
|
||||||
// Enable deletions in the pileup
|
// Enable deletions in the pileup
|
||||||
public boolean includeReadsWithDeletionAtLoci() { return true; }
|
public boolean includeReadsWithDeletionAtLoci() { return true; }
|
||||||
|
|
@ -88,172 +82,13 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
||||||
double percentCalledOfCallable() { return (100.0 * nBasesCalledConfidently) / nBasesCallable; }
|
double percentCalledOfCallable() { return (100.0 * nBasesCalledConfidently) / nBasesCallable; }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates the argument calculation object for the UnifiedGenotyper.
|
|
||||||
*
|
|
||||||
* @param toolkit the GATK Engine
|
|
||||||
* @param UAC the UnifiedArgumentCollection
|
|
||||||
* @return UG calculation arguments object
|
|
||||||
**/
|
|
||||||
public static UGCalculationArguments getUnifiedCalculationArguments(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) {
|
|
||||||
return getUnifiedCalculationArguments(toolkit, UAC, null);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates the argument calculation object for the UnifiedGenotyper.
|
|
||||||
*
|
|
||||||
* @param toolkit the GATK Engine
|
|
||||||
* @param UAC the UnifiedArgumentCollection
|
|
||||||
* @param writer the genotype writer
|
|
||||||
* @return UG calculation arguments object
|
|
||||||
*
|
|
||||||
**/
|
|
||||||
private static UGCalculationArguments getUnifiedCalculationArguments(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, GenotypeWriter writer) {
|
|
||||||
UGCalculationArguments UG_args = new UGCalculationArguments();
|
|
||||||
UG_args.UAC = UAC;
|
|
||||||
|
|
||||||
// deal with input errors
|
|
||||||
if ( UAC.POOLSIZE > 0 && UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED ) {
|
|
||||||
throw new IllegalArgumentException("Attempting to use a model other than POOLED with pooled data. Please set the model to POOLED.");
|
|
||||||
}
|
|
||||||
if ( UAC.POOLSIZE < 1 && UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED ) {
|
|
||||||
throw new IllegalArgumentException("Attempting to use the POOLED model with a pool size less than 1. Please set the pool size to an appropriate value.");
|
|
||||||
}
|
|
||||||
if ( toolkit.getArguments().numberOfThreads > 1 && UAC.ASSUME_SINGLE_SAMPLE != null ) {
|
|
||||||
// the ASSUME_SINGLE_SAMPLE argument can't be handled (at least for now) while we are multi-threaded because the IO system doesn't know how to get the sample name
|
|
||||||
throw new IllegalArgumentException("For technical reasons, the ASSUME_SINGLE_SAMPLE argument cannot be used with multiple threads");
|
|
||||||
}
|
|
||||||
|
|
||||||
// get all of the unique sample names - unless we're in POOLED mode, in which case we ignore the sample names
|
|
||||||
if ( UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED ) {
|
|
||||||
// if we're supposed to assume a single sample, do so
|
|
||||||
if ( UAC.ASSUME_SINGLE_SAMPLE != null )
|
|
||||||
UG_args.samples.add(UAC.ASSUME_SINGLE_SAMPLE);
|
|
||||||
else
|
|
||||||
UG_args.samples = SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader());
|
|
||||||
}
|
|
||||||
|
|
||||||
// in pooled mode we need to check that the format is acceptable
|
|
||||||
if ( UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED && writer != null ) {
|
|
||||||
// only multi-sample calls use Variations
|
|
||||||
if ( !writer.supportsMultiSample() )
|
|
||||||
throw new IllegalArgumentException("The POOLED model is not compatible with the specified format; try using VCF instead");
|
|
||||||
|
|
||||||
// when using VCF with multiple threads, we need to turn down the validation stringency so that writing temporary files will work
|
|
||||||
if ( toolkit.getArguments().numberOfThreads > 1 && writer instanceof VCFGenotypeWriter )
|
|
||||||
((VCFGenotypeWriter)writer).setValidationStringency(VCFGenotypeWriterAdapter.VALIDATION_STRINGENCY.SILENT);
|
|
||||||
}
|
|
||||||
|
|
||||||
// check to see whether a dbsnp rod was included
|
|
||||||
List<ReferenceOrderedDataSource> dataSources = toolkit.getRodDataSources();
|
|
||||||
for ( ReferenceOrderedDataSource source : dataSources ) {
|
|
||||||
ReferenceOrderedData rod = source.getReferenceOrderedData();
|
|
||||||
if ( rod.getType().equals(rodDbSNP.class) ) {
|
|
||||||
UG_args.annotateDbsnp = true;
|
|
||||||
}
|
|
||||||
if ( rod.getName().equals("hapmap2") ) {
|
|
||||||
UG_args.annotateHapmap2 = true;
|
|
||||||
}
|
|
||||||
if ( rod.getName().equals("hapmap3") ) {
|
|
||||||
UG_args.annotateHapmap3 = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return UG_args;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compute at a given locus.
|
|
||||||
*
|
|
||||||
* @param tracker the meta data tracker
|
|
||||||
* @param refContext the reference base
|
|
||||||
* @param rawContext contextual information around the locus
|
|
||||||
* @param UG_args the calculation argument collection
|
|
||||||
* @return the VariantCallContext object
|
|
||||||
*/
|
|
||||||
public static VariantCallContext runGenotyper(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, UGCalculationArguments UG_args) {
|
|
||||||
|
|
||||||
char ref = Character.toUpperCase(refContext.getBase());
|
|
||||||
if ( !BaseUtils.isRegularBase(ref) )
|
|
||||||
return null;
|
|
||||||
|
|
||||||
// don't try to call if we couldn't read in all reads at this locus (since it wasn't properly downsampled)
|
|
||||||
if ( rawContext.hasExceededMaxPileup() )
|
|
||||||
return null;
|
|
||||||
|
|
||||||
ReadBackedPileup rawPileup = rawContext.getBasePileup();
|
|
||||||
|
|
||||||
// filter the context based on min base and mapping qualities
|
|
||||||
ReadBackedPileup pileup = rawPileup.getBaseAndMappingFilteredPileup(UG_args.UAC.MIN_BASE_QUALTY_SCORE, UG_args.UAC.MIN_MAPPING_QUALTY_SCORE);
|
|
||||||
|
|
||||||
// filter the context based on mapping quality and mismatch rate
|
|
||||||
pileup = filterPileup(pileup, refContext, UG_args.UAC);
|
|
||||||
|
|
||||||
// don't call when there is no coverage
|
|
||||||
if ( pileup.size() == 0 )
|
|
||||||
return null;
|
|
||||||
|
|
||||||
// are there too many deletions in the pileup?
|
|
||||||
if ( isValidDeletionFraction(UG_args.UAC.MAX_DELETION_FRACTION) &&
|
|
||||||
(double)pileup.getNumberOfDeletions() / (double)pileup.size() > UG_args.UAC.MAX_DELETION_FRACTION )
|
|
||||||
return null;
|
|
||||||
|
|
||||||
// stratify the AlignmentContext and cut by sample
|
|
||||||
// Note that for testing purposes, we may want to throw multi-samples at pooled mode
|
|
||||||
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(pileup, UG_args.UAC.ASSUME_SINGLE_SAMPLE, (UG_args.UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED ? PooledCalculationModel.POOL_SAMPLE_NAME : null));
|
|
||||||
if ( stratifiedContexts == null )
|
|
||||||
return null;
|
|
||||||
|
|
||||||
DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UG_args.UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_REFERENCE_ERROR);
|
|
||||||
VariantCallContext call = UG_args.gcm.get().callLocus(tracker, ref, rawContext.getLocation(), stratifiedContexts, priors);
|
|
||||||
|
|
||||||
// annotate the call, if possible
|
|
||||||
if ( call != null && call.variation != null && call.variation instanceof ArbitraryFieldsBacked ) {
|
|
||||||
// first off, we want to use the *unfiltered* context for the annotations
|
|
||||||
stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(rawContext.getBasePileup());
|
|
||||||
|
|
||||||
Map<String, String> annotations;
|
|
||||||
if ( UG_args.UAC.ALL_ANNOTATIONS )
|
|
||||||
annotations = VariantAnnotator.getAllAnnotations(tracker, refContext, stratifiedContexts, call.variation, UG_args.annotateDbsnp, UG_args.annotateHapmap2, UG_args.annotateHapmap3);
|
|
||||||
else
|
|
||||||
annotations = VariantAnnotator.getAnnotations(tracker, refContext, stratifiedContexts, call.variation, UG_args.annotateDbsnp, UG_args.annotateHapmap2, UG_args.annotateHapmap3);
|
|
||||||
((ArbitraryFieldsBacked)call.variation).setFields(annotations);
|
|
||||||
}
|
|
||||||
|
|
||||||
return call;
|
|
||||||
}
|
|
||||||
|
|
||||||
// filter based on maximum mismatches and bad mates
|
|
||||||
private static ReadBackedPileup filterPileup(ReadBackedPileup pileup, ReferenceContext refContext, UnifiedArgumentCollection UAC) {
|
|
||||||
|
|
||||||
ArrayList<PileupElement> filteredPileup = new ArrayList<PileupElement>();
|
|
||||||
for ( PileupElement p : pileup ) {
|
|
||||||
if ( (UAC.USE_BADLY_MATED_READS || !p.getRead().getReadPairedFlag() || p.getRead().getMateUnmappedFlag() || p.getRead().getMateReferenceIndex() == p.getRead().getReferenceIndex()) &&
|
|
||||||
AlignmentUtils.mismatchesInRefWindow(p, refContext, true) <= UAC.MAX_MISMATCHES )
|
|
||||||
filteredPileup.add(p);
|
|
||||||
}
|
|
||||||
return new ReadBackedPileup(pileup.getLocation(), filteredPileup);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static boolean isValidDeletionFraction(double d) {
|
|
||||||
return ( d >= 0.0 && d <= 1.0 );
|
|
||||||
}
|
|
||||||
|
|
||||||
// ------------------------------------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// The following methods are walker-specific; don't use them unless you are a traversal.
|
|
||||||
// If you are a walker, stick to the static methods defined above.
|
|
||||||
//
|
|
||||||
// ------------------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initialize the samples, output, and genotype calculation model
|
* Initialize the samples, output, and genotype calculation model
|
||||||
*
|
*
|
||||||
**/
|
**/
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
|
|
||||||
UG_args = getUnifiedCalculationArguments(getToolkit(), UAC, writer);
|
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, writer, verboseWriter, beagleWriter);
|
||||||
|
|
||||||
// initialize the writers
|
// initialize the writers
|
||||||
if ( verboseWriter != null ) {
|
if ( verboseWriter != null ) {
|
||||||
|
|
@ -267,16 +102,16 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
||||||
}
|
}
|
||||||
if ( beagleWriter != null ) {
|
if ( beagleWriter != null ) {
|
||||||
beagleWriter.print("marker alleleA alleleB");
|
beagleWriter.print("marker alleleA alleleB");
|
||||||
for ( String sample : UG_args.samples )
|
for ( String sample : UG_engine.samples )
|
||||||
beagleWriter.print(String.format(" %s %s %s", sample, sample, sample));
|
beagleWriter.print(String.format(" %s %s %s", sample, sample, sample));
|
||||||
beagleWriter.println();
|
beagleWriter.println();
|
||||||
}
|
}
|
||||||
|
|
||||||
// initialize the header
|
// initialize the header
|
||||||
GenotypeWriterFactory.writeHeader(writer, GenomeAnalysisEngine.instance.getSAMFileHeader(), UG_args.samples, getHeaderInfo(UG_args));
|
GenotypeWriterFactory.writeHeader(writer, GenomeAnalysisEngine.instance.getSAMFileHeader(), UG_engine.samples, getHeaderInfo());
|
||||||
}
|
}
|
||||||
|
|
||||||
private Set<VCFHeaderLine> getHeaderInfo(UGCalculationArguments UG_args) {
|
private Set<VCFHeaderLine> getHeaderInfo() {
|
||||||
Set<VCFHeaderLine> headerInfo = new HashSet<VCFHeaderLine>();
|
Set<VCFHeaderLine> headerInfo = new HashSet<VCFHeaderLine>();
|
||||||
|
|
||||||
// this is only applicable to VCF
|
// this is only applicable to VCF
|
||||||
|
|
@ -295,11 +130,11 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
||||||
|
|
||||||
// annotation (INFO) fields from UnifiedGenotyper
|
// annotation (INFO) fields from UnifiedGenotyper
|
||||||
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.ALLELE_FREQUENCY_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Allele Frequency"));
|
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.ALLELE_FREQUENCY_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Allele Frequency"));
|
||||||
if ( UG_args.annotateDbsnp )
|
if ( UG_engine.annotateDbsnp )
|
||||||
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.DBSNP_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "dbSNP Membership"));
|
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.DBSNP_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "dbSNP Membership"));
|
||||||
if ( UG_args.annotateHapmap2 )
|
if ( UG_engine.annotateHapmap2 )
|
||||||
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.HAPMAP2_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "HapMap2 Membership"));
|
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.HAPMAP2_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "HapMap2 Membership"));
|
||||||
if ( UG_args.annotateHapmap3 )
|
if ( UG_engine.annotateHapmap3 )
|
||||||
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.HAPMAP3_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "HapMap3 Membership"));
|
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.HAPMAP3_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "HapMap3 Membership"));
|
||||||
if ( !UAC.NO_SLOD )
|
if ( !UAC.NO_SLOD )
|
||||||
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.STRAND_BIAS_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Strand Bias"));
|
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.STRAND_BIAS_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Strand Bias"));
|
||||||
|
|
@ -331,24 +166,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
||||||
* @return the VariantCallContext object
|
* @return the VariantCallContext object
|
||||||
*/
|
*/
|
||||||
public VariantCallContext map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
|
public VariantCallContext map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
|
||||||
|
return UG_engine.runGenotyper(tracker, refContext, rawContext);
|
||||||
// initialize the GenotypeCalculationModel for this thread if that hasn't been done yet
|
|
||||||
if ( UG_args.gcm.get() == null ) {
|
|
||||||
GenotypeWriterFactory.GENOTYPE_FORMAT format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
|
|
||||||
if ( writer != null ) {
|
|
||||||
if ( writer instanceof VCFGenotypeWriter )
|
|
||||||
format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
|
|
||||||
else if ( writer instanceof GLFGenotypeWriter )
|
|
||||||
format = GenotypeWriterFactory.GENOTYPE_FORMAT.GLF;
|
|
||||||
else if ( writer instanceof GeliGenotypeWriter )
|
|
||||||
format = GenotypeWriterFactory.GENOTYPE_FORMAT.GELI;
|
|
||||||
else
|
|
||||||
throw new StingException("Unsupported genotype format: " + writer.getClass().getName());
|
|
||||||
}
|
|
||||||
UG_args.gcm.set(GenotypeCalculationModelFactory.makeGenotypeCalculation(UG_args.samples, logger, UAC, format, verboseWriter, beagleWriter));
|
|
||||||
}
|
|
||||||
|
|
||||||
return runGenotyper(tracker, refContext, rawContext, UG_args);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public UGStatistics reduceInit() { return new UGStatistics(); }
|
public UGStatistics reduceInit() { return new UGStatistics(); }
|
||||||
|
|
@ -381,7 +199,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
||||||
}
|
}
|
||||||
|
|
||||||
// if we have a single-sample call (single sample from PointEstimate model returns no VariationCall data)
|
// if we have a single-sample call (single sample from PointEstimate model returns no VariationCall data)
|
||||||
if ( value.variation == null || (!writer.supportsMultiSample() && UG_args.samples.size() <= 1) ) {
|
if ( value.variation == null || (!writer.supportsMultiSample() && UG_engine.samples.size() <= 1) ) {
|
||||||
writer.addGenotypeCall(value.genotypes.get(0));
|
writer.addGenotypeCall(value.genotypes.get(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,227 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
|
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.*;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotator;
|
||||||
|
import org.broadinstitute.sting.utils.*;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.*;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.*;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.glf.GLFGenotypeWriter;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
import java.io.PrintStream;
|
||||||
|
|
||||||
|
|
||||||
|
public class UnifiedGenotyperEngine {
|
||||||
|
|
||||||
|
// should we annotate dbsnp?
|
||||||
|
protected boolean annotateDbsnp = false;
|
||||||
|
// should we annotate hapmap2?
|
||||||
|
protected boolean annotateHapmap2 = false;
|
||||||
|
// should we annotate hapmap3?
|
||||||
|
protected boolean annotateHapmap3 = false;
|
||||||
|
|
||||||
|
// the unified argument collection
|
||||||
|
protected UnifiedArgumentCollection UAC = null;
|
||||||
|
|
||||||
|
// the model used for calculating genotypes
|
||||||
|
protected ThreadLocal<GenotypeCalculationModel> gcm = new ThreadLocal<GenotypeCalculationModel>();
|
||||||
|
|
||||||
|
// the various loggers and writers
|
||||||
|
protected Logger logger = null;
|
||||||
|
protected GenotypeWriter genotypeWriter = null;
|
||||||
|
protected PrintStream verboseWriter = null;
|
||||||
|
protected PrintStream beagleWriter = null;
|
||||||
|
|
||||||
|
// samples in input
|
||||||
|
protected Set<String> samples = new HashSet<String>();
|
||||||
|
|
||||||
|
|
||||||
|
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) {
|
||||||
|
initialize(toolkit, UAC, null, null, null, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, GenotypeWriter genotypeWriter, PrintStream verboseWriter, PrintStream beagleWriter) {
|
||||||
|
initialize(toolkit, UAC, logger, genotypeWriter, verboseWriter, beagleWriter);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private void initialize(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, GenotypeWriter genotypeWriter, PrintStream verboseWriter, PrintStream beagleWriter) {
|
||||||
|
this.UAC = UAC;
|
||||||
|
this.logger = logger;
|
||||||
|
this.genotypeWriter = genotypeWriter;
|
||||||
|
this.verboseWriter = verboseWriter;
|
||||||
|
this.beagleWriter = beagleWriter;
|
||||||
|
|
||||||
|
// deal with input errors
|
||||||
|
if ( UAC.POOLSIZE > 0 && UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED ) {
|
||||||
|
throw new IllegalArgumentException("Attempting to use a model other than POOLED with pooled data. Please set the model to POOLED.");
|
||||||
|
}
|
||||||
|
if ( UAC.POOLSIZE < 1 && UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED ) {
|
||||||
|
throw new IllegalArgumentException("Attempting to use the POOLED model with a pool size less than 1. Please set the pool size to an appropriate value.");
|
||||||
|
}
|
||||||
|
if ( toolkit.getArguments().numberOfThreads > 1 && UAC.ASSUME_SINGLE_SAMPLE != null ) {
|
||||||
|
// the ASSUME_SINGLE_SAMPLE argument can't be handled (at least for now) while we are multi-threaded because the IO system doesn't know how to get the sample name
|
||||||
|
throw new IllegalArgumentException("For technical reasons, the ASSUME_SINGLE_SAMPLE argument cannot be used with multiple threads");
|
||||||
|
}
|
||||||
|
|
||||||
|
// get all of the unique sample names - unless we're in POOLED mode, in which case we ignore the sample names
|
||||||
|
if ( UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED ) {
|
||||||
|
// if we're supposed to assume a single sample, do so
|
||||||
|
if ( UAC.ASSUME_SINGLE_SAMPLE != null )
|
||||||
|
this.samples.add(UAC.ASSUME_SINGLE_SAMPLE);
|
||||||
|
else
|
||||||
|
this.samples = SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader());
|
||||||
|
}
|
||||||
|
|
||||||
|
// in pooled mode we need to check that the format is acceptable
|
||||||
|
if ( UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED && genotypeWriter != null ) {
|
||||||
|
// only multi-sample calls use Variations
|
||||||
|
if ( !genotypeWriter.supportsMultiSample() )
|
||||||
|
throw new IllegalArgumentException("The POOLED model is not compatible with the specified format; try using VCF instead");
|
||||||
|
|
||||||
|
// when using VCF with multiple threads, we need to turn down the validation stringency so that writing temporary files will work
|
||||||
|
if ( toolkit.getArguments().numberOfThreads > 1 && genotypeWriter instanceof VCFGenotypeWriter )
|
||||||
|
((VCFGenotypeWriter)genotypeWriter).setValidationStringency(VCFGenotypeWriterAdapter.VALIDATION_STRINGENCY.SILENT);
|
||||||
|
}
|
||||||
|
|
||||||
|
// check to see whether a dbsnp rod was included
|
||||||
|
List<ReferenceOrderedDataSource> dataSources = toolkit.getRodDataSources();
|
||||||
|
for ( ReferenceOrderedDataSource source : dataSources ) {
|
||||||
|
ReferenceOrderedData rod = source.getReferenceOrderedData();
|
||||||
|
if ( rod.getType().equals(rodDbSNP.class) ) {
|
||||||
|
this.annotateDbsnp = true;
|
||||||
|
}
|
||||||
|
if ( rod.getName().equals("hapmap2") ) {
|
||||||
|
this.annotateHapmap2 = true;
|
||||||
|
}
|
||||||
|
if ( rod.getName().equals("hapmap3") ) {
|
||||||
|
this.annotateHapmap3 = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compute at a given locus.
|
||||||
|
*
|
||||||
|
* @param tracker the meta data tracker
|
||||||
|
* @param refContext the reference base
|
||||||
|
* @param rawContext contextual information around the locus
|
||||||
|
* @return the VariantCallContext object
|
||||||
|
*/
|
||||||
|
public VariantCallContext runGenotyper(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
|
||||||
|
|
||||||
|
// initialize the GenotypeCalculationModel for this thread if that hasn't been done yet
|
||||||
|
if ( gcm.get() == null ) {
|
||||||
|
GenotypeWriterFactory.GENOTYPE_FORMAT format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
|
||||||
|
if ( genotypeWriter != null ) {
|
||||||
|
if ( genotypeWriter instanceof VCFGenotypeWriter )
|
||||||
|
format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
|
||||||
|
else if ( genotypeWriter instanceof GLFGenotypeWriter)
|
||||||
|
format = GenotypeWriterFactory.GENOTYPE_FORMAT.GLF;
|
||||||
|
else if ( genotypeWriter instanceof GeliGenotypeWriter)
|
||||||
|
format = GenotypeWriterFactory.GENOTYPE_FORMAT.GELI;
|
||||||
|
else
|
||||||
|
throw new StingException("Unsupported genotype format: " + genotypeWriter.getClass().getName());
|
||||||
|
}
|
||||||
|
gcm.set(GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC, format, verboseWriter, beagleWriter));
|
||||||
|
}
|
||||||
|
|
||||||
|
char ref = Character.toUpperCase(refContext.getBase());
|
||||||
|
if ( !BaseUtils.isRegularBase(ref) )
|
||||||
|
return null;
|
||||||
|
|
||||||
|
// don't try to call if we couldn't read in all reads at this locus (since it wasn't properly downsampled)
|
||||||
|
if ( rawContext.hasExceededMaxPileup() )
|
||||||
|
return null;
|
||||||
|
|
||||||
|
ReadBackedPileup rawPileup = rawContext.getBasePileup();
|
||||||
|
|
||||||
|
// filter the context based on min base and mapping qualities
|
||||||
|
ReadBackedPileup pileup = rawPileup.getBaseAndMappingFilteredPileup(UAC.MIN_BASE_QUALTY_SCORE, UAC.MIN_MAPPING_QUALTY_SCORE);
|
||||||
|
|
||||||
|
// filter the context based on mapping quality and mismatch rate
|
||||||
|
pileup = filterPileup(pileup, refContext);
|
||||||
|
|
||||||
|
// don't call when there is no coverage
|
||||||
|
if ( pileup.size() == 0 )
|
||||||
|
return null;
|
||||||
|
|
||||||
|
// are there too many deletions in the pileup?
|
||||||
|
if ( isValidDeletionFraction(UAC.MAX_DELETION_FRACTION) &&
|
||||||
|
(double)pileup.getNumberOfDeletions() / (double)pileup.size() > UAC.MAX_DELETION_FRACTION )
|
||||||
|
return null;
|
||||||
|
|
||||||
|
// stratify the AlignmentContext and cut by sample
|
||||||
|
// Note that for testing purposes, we may want to throw multi-samples at pooled mode
|
||||||
|
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(pileup, UAC.ASSUME_SINGLE_SAMPLE, (UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED ? PooledCalculationModel.POOL_SAMPLE_NAME : null));
|
||||||
|
if ( stratifiedContexts == null )
|
||||||
|
return null;
|
||||||
|
|
||||||
|
DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_REFERENCE_ERROR);
|
||||||
|
VariantCallContext call = gcm.get().callLocus(tracker, ref, rawContext.getLocation(), stratifiedContexts, priors);
|
||||||
|
|
||||||
|
// annotate the call, if possible
|
||||||
|
if ( call != null && call.variation != null && call.variation instanceof ArbitraryFieldsBacked ) {
|
||||||
|
// first off, we want to use the *unfiltered* context for the annotations
|
||||||
|
stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(rawContext.getBasePileup());
|
||||||
|
|
||||||
|
Map<String, String> annotations;
|
||||||
|
if ( UAC.ALL_ANNOTATIONS )
|
||||||
|
annotations = VariantAnnotator.getAllAnnotations(tracker, refContext, stratifiedContexts, call.variation, annotateDbsnp, annotateHapmap2, annotateHapmap3);
|
||||||
|
else
|
||||||
|
annotations = VariantAnnotator.getAnnotations(tracker, refContext, stratifiedContexts, call.variation, annotateDbsnp, annotateHapmap2, annotateHapmap3);
|
||||||
|
((ArbitraryFieldsBacked)call.variation).setFields(annotations);
|
||||||
|
}
|
||||||
|
|
||||||
|
return call;
|
||||||
|
}
|
||||||
|
|
||||||
|
// filter based on maximum mismatches and bad mates
|
||||||
|
private ReadBackedPileup filterPileup(ReadBackedPileup pileup, ReferenceContext refContext) {
|
||||||
|
|
||||||
|
ArrayList<PileupElement> filteredPileup = new ArrayList<PileupElement>();
|
||||||
|
for ( PileupElement p : pileup ) {
|
||||||
|
if ( (UAC.USE_BADLY_MATED_READS || !p.getRead().getReadPairedFlag() || p.getRead().getMateUnmappedFlag() || p.getRead().getMateReferenceIndex() == p.getRead().getReferenceIndex()) &&
|
||||||
|
AlignmentUtils.mismatchesInRefWindow(p, refContext, true) <= UAC.MAX_MISMATCHES )
|
||||||
|
filteredPileup.add(p);
|
||||||
|
}
|
||||||
|
return new ReadBackedPileup(pileup.getLocation(), filteredPileup);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isValidDeletionFraction(double d) {
|
||||||
|
return ( d >= 0.0 && d <= 1.0 );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -49,7 +49,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
|
||||||
@Argument(fullName="forcePreviousReadBasesToMatchRef", doc="Forces previous read bases to match the reference", required = false)
|
@Argument(fullName="forcePreviousReadBasesToMatchRef", doc="Forces previous read bases to match the reference", required = false)
|
||||||
boolean readBasesMustMatchRef = false;
|
boolean readBasesMustMatchRef = false;
|
||||||
|
|
||||||
private UGCalculationArguments ug;
|
private UnifiedGenotyperEngine ug;
|
||||||
// private ReferenceContextWindow refWindow;
|
// private ReferenceContextWindow refWindow;
|
||||||
// private Set<BaseTransitionTable> conditionalTables;
|
// private Set<BaseTransitionTable> conditionalTables;
|
||||||
private List<Boolean> usePreviousBases;
|
private List<Boolean> usePreviousBases;
|
||||||
|
|
@ -62,7 +62,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
|
||||||
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
|
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
|
||||||
uac.baseModel = BaseMismatchModel.THREE_STATE;
|
uac.baseModel = BaseMismatchModel.THREE_STATE;
|
||||||
uac.ALL_BASES = true;
|
uac.ALL_BASES = true;
|
||||||
ug = UnifiedGenotyper.getUnifiedCalculationArguments(getToolkit(), uac);
|
ug = new UnifiedGenotyperEngine(getToolkit(), uac);
|
||||||
// refWindow = new ReferenceContextWindow(nPreviousBases);
|
// refWindow = new ReferenceContextWindow(nPreviousBases);
|
||||||
usePreviousBases = new ArrayList<Boolean>();
|
usePreviousBases = new ArrayList<Boolean>();
|
||||||
previousBaseLoci = new ArrayList<GenomeLoc>();
|
previousBaseLoci = new ArrayList<GenomeLoc>();
|
||||||
|
|
@ -359,7 +359,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
|
||||||
public boolean baseIsConfidentRef( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
|
public boolean baseIsConfidentRef( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
|
||||||
if ( !BaseUtils.isRegularBase(ref.getBase()) )
|
if ( !BaseUtils.isRegularBase(ref.getBase()) )
|
||||||
return false;
|
return false;
|
||||||
VariantCallContext calls = UnifiedGenotyper.runGenotyper(tracker,ref,context,ug);
|
VariantCallContext calls = ug.runGenotyper(tracker,ref,context);
|
||||||
if ( calls == null || calls.genotypes == null)
|
if ( calls == null || calls.genotypes == null)
|
||||||
return false;
|
return false;
|
||||||
return ( calls.genotypes.size() > 0 && !calls.genotypes.get(0).isVariant(ref.getBase()) && calls.genotypes.get(0).getNegLog10PError() > confidentRefThreshold );
|
return ( calls.genotypes.size() > 0 && !calls.genotypes.get(0).isVariant(ref.getBase()) && calls.genotypes.get(0).getNegLog10PError() > confidentRefThreshold );
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,6 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.genotype.*;
|
import org.broadinstitute.sting.utils.genotype.*;
|
||||||
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Walker to calculate the number of mismatches, their base counts, and their quality sums at confidence ref sites"
|
* Walker to calculate the number of mismatches, their base counts, and their quality sums at confidence ref sites"
|
||||||
|
|
@ -39,13 +38,13 @@ public class LocusMismatchWalker extends LocusWalker<String,Integer> implements
|
||||||
@Argument(fullName="skip", doc = "Only display every skip eligable sites. Defaults to all sites", required = false)
|
@Argument(fullName="skip", doc = "Only display every skip eligable sites. Defaults to all sites", required = false)
|
||||||
int skip = 1;
|
int skip = 1;
|
||||||
|
|
||||||
private UGCalculationArguments ug;
|
private UnifiedGenotyperEngine ug;
|
||||||
|
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
|
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
|
||||||
uac.baseModel = BaseMismatchModel.THREE_STATE;
|
uac.baseModel = BaseMismatchModel.THREE_STATE;
|
||||||
uac.ALL_BASES = true;
|
uac.ALL_BASES = true;
|
||||||
ug = UnifiedGenotyper.getUnifiedCalculationArguments(getToolkit(), uac);
|
ug = new UnifiedGenotyperEngine(getToolkit(), uac);
|
||||||
|
|
||||||
// print the header
|
// print the header
|
||||||
out.printf("loc ref genotype genotypeQ depth nMM qSumMM A C G T%n");
|
out.printf("loc ref genotype genotypeQ depth nMM qSumMM A C G T%n");
|
||||||
|
|
@ -54,7 +53,7 @@ public class LocusMismatchWalker extends LocusWalker<String,Integer> implements
|
||||||
public String map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
|
public String map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
|
||||||
String result = null;
|
String result = null;
|
||||||
|
|
||||||
ReadBackedPileup pileup = context.getPileup();
|
ReadBackedPileup pileup = context.getBasePileup();
|
||||||
if ( locusIsUsable(tracker, ref, pileup, context) ) {
|
if ( locusIsUsable(tracker, ref, pileup, context) ) {
|
||||||
Genotype g = getGenotype(tracker, ref, context);
|
Genotype g = getGenotype(tracker, ref, context);
|
||||||
if ( g != null && g.isPointGenotype() )
|
if ( g != null && g.isPointGenotype() )
|
||||||
|
|
@ -163,7 +162,7 @@ public class LocusMismatchWalker extends LocusWalker<String,Integer> implements
|
||||||
}
|
}
|
||||||
|
|
||||||
private Genotype getGenotype( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
|
private Genotype getGenotype( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
|
||||||
VariantCallContext calls = UnifiedGenotyper.runGenotyper(tracker,ref,context, ug);
|
VariantCallContext calls = ug.runGenotyper(tracker,ref,context);
|
||||||
if ( calls == null || calls.variation == null || calls.genotypes == null )
|
if ( calls == null || calls.variation == null || calls.genotypes == null )
|
||||||
return null;
|
return null;
|
||||||
else {
|
else {
|
||||||
|
|
|
||||||
|
|
@ -5,10 +5,6 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.*;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
|
||||||
import org.broadinstitute.sting.utils.genotype.VariationCall;
|
|
||||||
import org.broadinstitute.sting.utils.genotype.GenotypeCall;
|
|
||||||
import org.broadinstitute.sting.utils.Pair;
|
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
|
|
@ -16,8 +12,6 @@ import org.broadinstitute.sting.playground.utils.NamedTable;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import net.sf.samtools.SAMReadGroupRecord;
|
import net.sf.samtools.SAMReadGroupRecord;
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import cern.jet.stat.Probability;
|
import cern.jet.stat.Probability;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -34,13 +28,13 @@ public class FindContaminatingReadGroupsWalker extends LocusWalker<Integer, Inte
|
||||||
@Argument(fullName="limit", shortName="lim", doc="The pValue limit for which a read group will be deemed to be a contaminant", required=false)
|
@Argument(fullName="limit", shortName="lim", doc="The pValue limit for which a read group will be deemed to be a contaminant", required=false)
|
||||||
private Double LIMIT = 1e-9;
|
private Double LIMIT = 1e-9;
|
||||||
|
|
||||||
private UGCalculationArguments ug;
|
private UnifiedGenotyperEngine ug;
|
||||||
private NamedTable altTable;
|
private NamedTable altTable;
|
||||||
|
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
|
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
|
||||||
uac.CONFIDENCE_THRESHOLD = 50;
|
uac.CONFIDENCE_THRESHOLD = 50;
|
||||||
ug = UnifiedGenotyper.getUnifiedCalculationArguments(getToolkit(), uac);
|
ug = new UnifiedGenotyperEngine(getToolkit(), uac);
|
||||||
|
|
||||||
altTable = new NamedTable();
|
altTable = new NamedTable();
|
||||||
}
|
}
|
||||||
|
|
@ -58,7 +52,7 @@ public class FindContaminatingReadGroupsWalker extends LocusWalker<Integer, Inte
|
||||||
int altCount = 0;
|
int altCount = 0;
|
||||||
int totalCount = 0;
|
int totalCount = 0;
|
||||||
|
|
||||||
ReadBackedPileup pileup = context.getPileup();
|
ReadBackedPileup pileup = context.getBasePileup();
|
||||||
int refIndex = BaseUtils.simpleBaseToBaseIndex(ref.getBase());
|
int refIndex = BaseUtils.simpleBaseToBaseIndex(ref.getBase());
|
||||||
|
|
||||||
for (byte base : pileup.getBases() ) {
|
for (byte base : pileup.getBases() ) {
|
||||||
|
|
@ -73,7 +67,7 @@ public class FindContaminatingReadGroupsWalker extends LocusWalker<Integer, Inte
|
||||||
double altBalance = ((double) altCount)/((double) totalCount);
|
double altBalance = ((double) altCount)/((double) totalCount);
|
||||||
|
|
||||||
if (altBalance > 0.70) {
|
if (altBalance > 0.70) {
|
||||||
VariantCallContext ugResult = UnifiedGenotyper.runGenotyper(tracker, ref, context, ug);
|
VariantCallContext ugResult = ug.runGenotyper(tracker, ref, context);
|
||||||
|
|
||||||
if (ugResult != null && ugResult.genotypes != null && ugResult.genotypes.size() > 0) {
|
if (ugResult != null && ugResult.genotypes != null && ugResult.genotypes.size() > 0) {
|
||||||
return ugResult.genotypes.get(0).isHet();
|
return ugResult.genotypes.get(0).isHet();
|
||||||
|
|
|
||||||
|
|
@ -26,14 +26,14 @@ import java.util.HashMap;
|
||||||
public class SecondaryBaseTransitionTableWalker extends LocusWalker<Integer, Integer> {
|
public class SecondaryBaseTransitionTableWalker extends LocusWalker<Integer, Integer> {
|
||||||
|
|
||||||
HashMap<String,Long> counts = new HashMap<String,Long>();
|
HashMap<String,Long> counts = new HashMap<String,Long>();
|
||||||
private UGCalculationArguments ug;
|
private UnifiedGenotyperEngine ug;
|
||||||
private NamedTable altTable;
|
private NamedTable altTable;
|
||||||
|
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
|
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
|
||||||
uac.CONFIDENCE_THRESHOLD = 50;
|
uac.CONFIDENCE_THRESHOLD = 50;
|
||||||
uac.ALL_BASES = true;
|
uac.ALL_BASES = true;
|
||||||
ug = UnifiedGenotyper.getUnifiedCalculationArguments(getToolkit(), uac);
|
ug = new UnifiedGenotyperEngine(getToolkit(), uac);
|
||||||
|
|
||||||
altTable = new NamedTable();
|
altTable = new NamedTable();
|
||||||
}
|
}
|
||||||
|
|
@ -46,7 +46,7 @@ public class SecondaryBaseTransitionTableWalker extends LocusWalker<Integer, Int
|
||||||
char nextBase = Character.toUpperCase(contextBases[contextBases.length - 1]);
|
char nextBase = Character.toUpperCase(contextBases[contextBases.length - 1]);
|
||||||
|
|
||||||
if (contextBases.length == 3 && refBase != 'N' && pileup.getBases() != null && pileup.getSecondaryBases() != null) {
|
if (contextBases.length == 3 && refBase != 'N' && pileup.getBases() != null && pileup.getSecondaryBases() != null) {
|
||||||
VariantCallContext ugResult = UnifiedGenotyper.runGenotyper(tracker,ref,context,ug);
|
VariantCallContext ugResult = ug.runGenotyper(tracker,ref,context);
|
||||||
if (ugResult != null && ugResult.variation != null) {
|
if (ugResult != null && ugResult.variation != null) {
|
||||||
Genotype res = ugResult.genotypes.get(0);
|
Genotype res = ugResult.genotypes.get(0);
|
||||||
String call = res.getBases();
|
String call = res.getBases();
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue