UG is now officially in the business of making good SNP calls (as opposed to being hyper-aggressive in its calls and expecting the end-user to filter).
Bad/suspicious bases/reads (high mismatch rate, low MQ, low BQ, bad mates) are now filtered out by default (and not used for the annotations either), although this can all be turned off. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2373 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
af440943a4
commit
bb312814a2
|
|
@ -44,11 +44,10 @@ public class StratifiedAlignmentContext {
|
|||
|
||||
// Definitions:
|
||||
// COMPLETE = full alignment context
|
||||
// MQ0FREE = full context without MQ0 reads
|
||||
// FORWARD = reads on forward strand (*no* MQ0 reads)
|
||||
// REVERSE = reads on forward strand (*no* MQ0 reads)
|
||||
// FORWARD = reads on forward strand
|
||||
// REVERSE = reads on forward strand
|
||||
//
|
||||
public enum StratifiedContextType { COMPLETE, MQ0FREE, FORWARD, REVERSE }
|
||||
public enum StratifiedContextType { COMPLETE, FORWARD, REVERSE }
|
||||
|
||||
private GenomeLoc loc;
|
||||
private AlignmentContext[] contexts = new AlignmentContext[StratifiedContextType.values().length];
|
||||
|
|
@ -72,16 +71,12 @@ public class StratifiedAlignmentContext {
|
|||
}
|
||||
|
||||
public void add(SAMRecord read, int offset) {
|
||||
if ( read.getMappingQuality() > 0 ) {
|
||||
reads[StratifiedContextType.MQ0FREE.ordinal()].add(read);
|
||||
offsets[StratifiedContextType.MQ0FREE.ordinal()].add(offset);
|
||||
if ( read.getReadNegativeStrandFlag() ) {
|
||||
reads[StratifiedContextType.REVERSE.ordinal()].add(read);
|
||||
offsets[StratifiedContextType.REVERSE.ordinal()].add(offset);
|
||||
} else {
|
||||
reads[StratifiedContextType.FORWARD.ordinal()].add(read);
|
||||
offsets[StratifiedContextType.FORWARD.ordinal()].add(offset);
|
||||
}
|
||||
if ( read.getReadNegativeStrandFlag() ) {
|
||||
reads[StratifiedContextType.REVERSE.ordinal()].add(read);
|
||||
offsets[StratifiedContextType.REVERSE.ordinal()].add(offset);
|
||||
} else {
|
||||
reads[StratifiedContextType.FORWARD.ordinal()].add(read);
|
||||
offsets[StratifiedContextType.FORWARD.ordinal()].add(offset);
|
||||
}
|
||||
reads[StratifiedContextType.COMPLETE.ordinal()].add(read);
|
||||
offsets[StratifiedContextType.COMPLETE.ordinal()].add(offset);
|
||||
|
|
@ -90,18 +85,17 @@ public class StratifiedAlignmentContext {
|
|||
/**
|
||||
* Splits the given AlignmentContext into a StratifiedAlignmentContext per sample.
|
||||
*
|
||||
* @param context the original AlignmentContext
|
||||
* @param pileup the original pileup
|
||||
* @param assumedSingleSample if not null, any read without a readgroup will be given this sample name
|
||||
* @param collapseToThisSample if not null, all reads will be assigned this read group regardless of their actual read group
|
||||
*
|
||||
* @return a Map of sample name to StratifiedAlignmentContext
|
||||
*
|
||||
**/
|
||||
public static Map<String, StratifiedAlignmentContext> splitContextBySample(AlignmentContext context, String assumedSingleSample, String collapseToThisSample) {
|
||||
public static Map<String, StratifiedAlignmentContext> splitContextBySample(ReadBackedPileup pileup, String assumedSingleSample, String collapseToThisSample) {
|
||||
|
||||
HashMap<String, StratifiedAlignmentContext> contexts = new HashMap<String, StratifiedAlignmentContext>();
|
||||
|
||||
ReadBackedPileup pileup = context.getPileup();
|
||||
for (PileupElement p : pileup ) {
|
||||
|
||||
// get the read
|
||||
|
|
@ -125,7 +119,7 @@ public class StratifiedAlignmentContext {
|
|||
// create a new context object if this is the first time we're seeing a read for this sample
|
||||
StratifiedAlignmentContext myContext = contexts.get(sample);
|
||||
if ( myContext == null ) {
|
||||
myContext = new StratifiedAlignmentContext(context.getLocation());
|
||||
myContext = new StratifiedAlignmentContext(pileup.getLocation());
|
||||
contexts.put(sample, myContext);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ public class AlleleBalance extends StandardVariantAnnotation {
|
|||
if ( genotypeStr.length() != 2 )
|
||||
return null;
|
||||
|
||||
final String bases = new String(context.getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE).getPileup().getBases()).toUpperCase();
|
||||
final String bases = new String(context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getPileup().getBases()).toUpperCase();
|
||||
if ( bases.length() == 0 )
|
||||
return null;
|
||||
|
||||
|
|
|
|||
|
|
@ -20,5 +20,5 @@ public class DepthOfCoverage extends StandardVariantAnnotation {
|
|||
|
||||
public String getKeyName() { return VCFRecord.DEPTH_KEY; }
|
||||
|
||||
public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine(getKeyName(), 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "Total Depth (including MQ0 reads)"); }
|
||||
public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine(getKeyName(), 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "Total Depth"); }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,35 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.utils.pileup.*;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
public class MismatchRate implements VariantAnnotation {
|
||||
|
||||
public String annotate(ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, Variation variation) {
|
||||
int mismatches = 0;
|
||||
int totalBases = 0;
|
||||
for ( String sample : stratifiedContexts.keySet() ) {
|
||||
ReadBackedPileup pileup = stratifiedContexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE).getPileup();
|
||||
Pair<Integer, Integer> counts = AlignmentUtils.mismatchesInRefWindow(pileup, ref, true);
|
||||
mismatches += counts.first;
|
||||
totalBases += counts.second;
|
||||
}
|
||||
|
||||
// sanity check
|
||||
if ( totalBases == 0 )
|
||||
return null;
|
||||
|
||||
return String.format("%.2f", (double)mismatches/(double)totalBases);
|
||||
}
|
||||
|
||||
public String getKeyName() { return "MR"; }
|
||||
|
||||
public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine("MR", 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Mismatch Rate of Reads Spanning This Position"); }
|
||||
}
|
||||
|
|
@ -36,7 +36,7 @@ public class RankSumTest implements VariantAnnotation {
|
|||
if ( context == null )
|
||||
continue;
|
||||
|
||||
fillQualsFromPileup(ref.getBase(), variation.getAlternativeBaseForSNP(), context.getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE).getPileup(), refQuals, altQuals);
|
||||
fillQualsFromPileup(ref.getBase(), variation.getAlternativeBaseForSNP(), context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getPileup(), refQuals, altQuals);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ public class SecondBaseSkew implements VariantAnnotation {
|
|||
|
||||
Pair<Integer, Integer> depth = new Pair<Integer, Integer>(0, 0);
|
||||
for ( String sample : stratifiedContexts.keySet() ) {
|
||||
Pair<Integer, Integer> sampleDepth = getSecondaryPileupNonrefCount(ref.getBase(), stratifiedContexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE).getPileup(), snp);
|
||||
Pair<Integer, Integer> sampleDepth = getSecondaryPileupNonrefCount(ref.getBase(), stratifiedContexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getPileup(), snp);
|
||||
depth.first += sampleDepth.first;
|
||||
depth.second += sampleDepth.second;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ public class SpanningDeletions extends StandardVariantAnnotation {
|
|||
int deletions = 0;
|
||||
int depth = 0;
|
||||
for ( String sample : stratifiedContexts.keySet() ) {
|
||||
ReadBackedPileup pileup = stratifiedContexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE).getPileup();
|
||||
ReadBackedPileup pileup = stratifiedContexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getPileup();
|
||||
deletions += pileup.getNumberOfDeletions();
|
||||
depth += pileup.size();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -165,7 +165,7 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
|
|||
if ( BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 &&
|
||||
variant.isBiallelic() &&
|
||||
variant.isSNP() ) {
|
||||
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(context, null, null);
|
||||
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(context.getPileup(), null, null);
|
||||
if ( stratifiedContexts != null )
|
||||
annotations = getAnnotations(ref, stratifiedContexts, variant, requestedAnnotations);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -98,7 +98,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
|
|||
|
||||
|
||||
if ( call instanceof ReadBacked ) {
|
||||
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE).getPileup();
|
||||
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getPileup();
|
||||
((ReadBacked)call).setPileup(pileup);
|
||||
}
|
||||
if ( call instanceof SampleBacked ) {
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
|
|||
public Pair<VariationCall, List<Genotype>> calculateGenotype(RefMetaDataTracker tracker, char ref, GenomeLoc loc, Map<String, StratifiedAlignmentContext> contexts, DiploidGenotypePriors priors) {
|
||||
|
||||
// run the EM calculation
|
||||
EMOutput overall = runEM(ref, contexts, priors, StratifiedAlignmentContext.StratifiedContextType.MQ0FREE);
|
||||
EMOutput overall = runEM(ref, contexts, priors, StratifiedAlignmentContext.StratifiedContextType.COMPLETE);
|
||||
|
||||
double PofD = Math.pow(10, overall.getPofD());
|
||||
double PofNull = Math.pow(10, overall.getPofNull());
|
||||
|
|
@ -93,7 +93,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
|
|||
for ( String sample : GLs.keySet() ) {
|
||||
|
||||
// create the call
|
||||
AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE);
|
||||
AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE);
|
||||
GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, context.getLocation());
|
||||
|
||||
// set the genotype and confidence
|
||||
|
|
@ -105,7 +105,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
|
|||
call.setGenotype(bestGenotype);
|
||||
|
||||
if ( call instanceof ReadBacked ) {
|
||||
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE).getPileup();
|
||||
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getPileup();
|
||||
((ReadBacked)call).setPileup(pileup);
|
||||
}
|
||||
if ( call instanceof SampleBacked ) {
|
||||
|
|
|
|||
|
|
@ -55,8 +55,8 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
|
|||
|
||||
initializeAlleleFrequencies(frequencyEstimationPoints);
|
||||
|
||||
initialize(ref, contexts, StratifiedAlignmentContext.StratifiedContextType.MQ0FREE);
|
||||
calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedAlignmentContext.StratifiedContextType.MQ0FREE);
|
||||
initialize(ref, contexts, StratifiedAlignmentContext.StratifiedContextType.COMPLETE);
|
||||
calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedAlignmentContext.StratifiedContextType.COMPLETE);
|
||||
calculatePofFs(ref, frequencyEstimationPoints);
|
||||
|
||||
// print out stats if we have a writer
|
||||
|
|
@ -74,7 +74,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
|
|||
int[] qualCounts = new int[4];
|
||||
|
||||
for ( String sample : contexts.keySet() ) {
|
||||
AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE);
|
||||
AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE);
|
||||
|
||||
// calculate the sum of quality scores for each base
|
||||
ReadBackedPileup pileup = context.getPileup();
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
|
|||
return null;
|
||||
|
||||
// get the genotype likelihoods
|
||||
Pair<ReadBackedPileup, GenotypeLikelihoods> discoveryGL = getSingleSampleLikelihoods(sampleContext, priors, StratifiedAlignmentContext.StratifiedContextType.MQ0FREE);
|
||||
Pair<ReadBackedPileup, GenotypeLikelihoods> discoveryGL = getSingleSampleLikelihoods(sampleContext, priors, StratifiedAlignmentContext.StratifiedContextType.COMPLETE);
|
||||
|
||||
// find the index of the best genotype
|
||||
double[] normPosteriors = discoveryGL.second.getNormalizedPosteriors();
|
||||
|
|
|
|||
|
|
@ -88,9 +88,15 @@ public class UnifiedArgumentCollection {
|
|||
@Argument(fullName = "min_allele_frequency", shortName = "min_freq", doc = "The minimum possible allele frequency in a population (advanced)", required = false)
|
||||
public double MINIMUM_ALLELE_FREQUENCY = 1e-8;
|
||||
|
||||
@Argument(fullName = "minBaseQualityScore", shortName = "mbq", doc = "Minimum base quality required to consider a base for calling", required = false)
|
||||
public Integer MIN_BASE_QUALTY_SCORE = -1;
|
||||
@Argument(fullName = "min_base_quality_score", shortName = "mbq", doc = "Minimum base quality required to consider a base for calling", required = false)
|
||||
public int MIN_BASE_QUALTY_SCORE = 20;
|
||||
|
||||
@Argument(fullName = "minMappingQualityScore", shortName = "mmq", doc = "Minimum read mapping quality required to consider a read for calling", required = false)
|
||||
public Integer MIN_MAPPING_QUALTY_SCORE = -1;
|
||||
@Argument(fullName = "min_mapping_quality_score", shortName = "mmq", doc = "Minimum read mapping quality required to consider a read for calling (MQ0 reads are ignored regardless)", required = false)
|
||||
public int MIN_MAPPING_QUALTY_SCORE = 30;
|
||||
|
||||
@Argument(fullName = "max_mismatches_in_40bp_window", shortName = "mm40", doc = "Maximum number of mismatches within a 40 bp window (20bp on either side) around the target position for a read to be used for calling", required = false)
|
||||
public int MAX_MISMATCHES = 3;
|
||||
|
||||
@Argument(fullName = "use_reads_with_bad_mates", shortName = "bad_mates", doc = "Use reads whose mates are mapped excessively far away for calling", required = false)
|
||||
public boolean USE_BADLY_MATED_READS = false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,19 +26,16 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
|
||||
import org.broadinstitute.sting.gatk.contexts.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotator;
|
||||
import org.broadinstitute.sting.gatk.walkers.Reference;
|
||||
import org.broadinstitute.sting.gatk.walkers.Window;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.*;
|
||||
import org.broadinstitute.sting.utils.cmdLine.*;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
|
||||
|
|
@ -51,6 +48,7 @@ import java.util.*;
|
|||
* multi-sample, and pooled data. The user can choose from several different incorporated calculation models.
|
||||
*/
|
||||
@Reference(window=@Window(start=-20,stop=20))
|
||||
@ReadFilters({ZeroMappingQualityReadFilter.class})
|
||||
public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genotype>>, Integer> {
|
||||
|
||||
@ArgumentCollection private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
|
||||
|
|
@ -174,6 +172,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
|
|||
headerInfo.addAll(VariantAnnotator.getVCFAnnotationDescriptions());
|
||||
|
||||
// annotation (INFO) fields from UnifiedGenotyper
|
||||
headerInfo.add(new VCFHeaderLine("INFO_NOTE", "\"All annotations in the INFO field are generated only from the FILTERED context used for calling variants\""));
|
||||
headerInfo.add(new VCFInfoHeaderLine("AF", 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Allele Frequency"));
|
||||
headerInfo.add(new VCFInfoHeaderLine("NS", 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "Number of Samples With Data"));
|
||||
if ( !UAC.NO_SLOD )
|
||||
|
|
@ -203,26 +202,30 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
|
|||
if ( !BaseUtils.isRegularBase(ref) )
|
||||
return null;
|
||||
|
||||
// filter the context based on min base and mapping qualities
|
||||
ReadBackedPileup pileup = rawContext.getPileup().getBaseAndMappingFilteredPileup(UAC.MIN_BASE_QUALTY_SCORE, UAC.MIN_MAPPING_QUALTY_SCORE);
|
||||
|
||||
// filter the context based on mismatches and reads with bad mates
|
||||
pileup = filterPileup(pileup, refContext, UAC.MAX_MISMATCHES, UAC.USE_BADLY_MATED_READS);
|
||||
|
||||
// an optimization to speed things up when there is no coverage or when overly covered
|
||||
if ( rawContext.getPileup().size() == 0 ||
|
||||
(UAC.MAX_READS_IN_PILEUP > 0 && rawContext.getPileup().size() > UAC.MAX_READS_IN_PILEUP) )
|
||||
if ( pileup.size() == 0 ||
|
||||
(UAC.MAX_READS_IN_PILEUP > 0 && pileup.size() > UAC.MAX_READS_IN_PILEUP) )
|
||||
return null;
|
||||
|
||||
// are there too many deletions in the pileup?
|
||||
ReadBackedPileup pileup = rawContext.getPileup().getBaseAndMappingFilteredPileup(UAC.MIN_BASE_QUALTY_SCORE, UAC.MIN_MAPPING_QUALTY_SCORE);
|
||||
AlignmentContext context = new AlignmentContext(rawContext.getLocation(), pileup);
|
||||
if ( isValidDeletionFraction(UAC.MAX_DELETION_FRACTION) &&
|
||||
(double)pileup.getPileupWithoutMappingQualityZeroReads().getNumberOfDeletions() / (double)(pileup.size() - pileup.getNumberOfMappingQualityZeroReads()) > UAC.MAX_DELETION_FRACTION )
|
||||
(double)pileup.getNumberOfDeletions() / (double)pileup.size() > UAC.MAX_DELETION_FRACTION )
|
||||
return null;
|
||||
|
||||
// stratify the AlignmentContext and cut by sample
|
||||
// Note that for testing purposes, we may want to throw multi-samples at pooled mode
|
||||
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(context, UAC.ASSUME_SINGLE_SAMPLE, (UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED ? PooledCalculationModel.POOL_SAMPLE_NAME : null));
|
||||
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(pileup, UAC.ASSUME_SINGLE_SAMPLE, (UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED ? PooledCalculationModel.POOL_SAMPLE_NAME : null));
|
||||
if ( stratifiedContexts == null )
|
||||
return null;
|
||||
|
||||
DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE);
|
||||
Pair<VariationCall, List<Genotype>> call = gcm.calculateGenotype(tracker, ref, context.getLocation(), stratifiedContexts, priors);
|
||||
Pair<VariationCall, List<Genotype>> call = gcm.calculateGenotype(tracker, ref, rawContext.getLocation(), stratifiedContexts, priors);
|
||||
|
||||
// annotate the call, if possible
|
||||
if ( call != null && call.first != null && call.first instanceof ArbitraryFieldsBacked ) {
|
||||
|
|
@ -237,6 +240,18 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
|
|||
return call;
|
||||
}
|
||||
|
||||
// filter based on maximum mismatches and bad mates
|
||||
private static ReadBackedPileup filterPileup(ReadBackedPileup pileup, ReferenceContext refContext, int maxMismatches, boolean useBadMates) {
|
||||
ArrayList<PileupElement> filteredPileup = new ArrayList<PileupElement>();
|
||||
for ( PileupElement p : pileup ) {
|
||||
if ( (useBadMates || !p.getRead().getReadPairedFlag() || p.getRead().getMateUnmappedFlag() || p.getRead().getMateReferenceIndex() == p.getRead().getReferenceIndex()) &&
|
||||
AlignmentUtils.mismatchesInRefWindow(p, refContext, true) <= maxMismatches )
|
||||
filteredPileup.add(p);
|
||||
}
|
||||
return new ReadBackedPileup(pileup.getLocation(), filteredPileup);
|
||||
|
||||
}
|
||||
|
||||
private static boolean isValidDeletionFraction(double d) {
|
||||
return ( d >= 0.0 && d <= 1.0 );
|
||||
}
|
||||
|
|
|
|||
|
|
@ -118,7 +118,7 @@ public class AlignmentUtils {
|
|||
* @param refSeq chunk of reference sequence that subsumes the alignment completely (if alignment runs out of
|
||||
* the reference string, IndexOutOfBound exception will be thrown at runtime).
|
||||
* @param refIndex zero-based position, at which the alignment starts on the specified reference string.
|
||||
* @return
|
||||
* @return the number of mismatches
|
||||
*/
|
||||
public static int numMismatches(SAMRecord r, String refSeq, int refIndex) {
|
||||
int readIdx = 0;
|
||||
|
|
@ -160,15 +160,12 @@ public class AlignmentUtils {
|
|||
* @param pileup the pileup with reads
|
||||
* @param ref the reference context
|
||||
* @param ignoreTargetSite if true, ignore mismatches at the target locus (i.e. the center of the window)
|
||||
* @return a pair where the first value is the mismatches and the second is the total bases within the context
|
||||
* @return the number of mismatches
|
||||
*/
|
||||
public static Pair<Integer, Integer> mismatchesInRefWindow(ReadBackedPileup pileup, ReferenceContext ref, boolean ignoreTargetSite) {
|
||||
Pair<Integer, Integer> mismatches = new Pair<Integer, Integer>(0, 0);
|
||||
for ( PileupElement p : pileup ) {
|
||||
Pair<Integer, Integer> mm = mismatchesInRefWindow(p, ref, ignoreTargetSite);
|
||||
mismatches.first += mm.first;
|
||||
mismatches.second += mm.second;
|
||||
}
|
||||
public static int mismatchesInRefWindow(ReadBackedPileup pileup, ReferenceContext ref, boolean ignoreTargetSite) {
|
||||
int mismatches = 0;
|
||||
for ( PileupElement p : pileup )
|
||||
mismatches += mismatchesInRefWindow(p, ref, ignoreTargetSite);
|
||||
return mismatches;
|
||||
}
|
||||
|
||||
|
|
@ -177,12 +174,11 @@ public class AlignmentUtils {
|
|||
* @param p the pileup element
|
||||
* @param ref the reference context
|
||||
* @param ignoreTargetSite if true, ignore mismatches at the target locus (i.e. the center of the window)
|
||||
* @return a pair where the first value is the mismatches and the second is the total bases within the context
|
||||
* @return the number of mismatches
|
||||
*/
|
||||
public static Pair<Integer, Integer> mismatchesInRefWindow(PileupElement p, ReferenceContext ref, boolean ignoreTargetSite) {
|
||||
public static int mismatchesInRefWindow(PileupElement p, ReferenceContext ref, boolean ignoreTargetSite) {
|
||||
|
||||
int mismatches = 0;
|
||||
int totalBases = 0;
|
||||
|
||||
GenomeLoc window = ref.getWindow();
|
||||
char[] refBases = ref.getBases();
|
||||
|
|
@ -212,7 +208,6 @@ public class AlignmentUtils {
|
|||
if ( ignoreTargetSite && ref.getLocus().getStart() == currentPos )
|
||||
continue;
|
||||
|
||||
totalBases++;
|
||||
char readChr = (char)readBases[readIndex];
|
||||
if ( Character.toUpperCase(readChr) != Character.toUpperCase(refChr) )
|
||||
mismatches++;
|
||||
|
|
@ -234,8 +229,7 @@ public class AlignmentUtils {
|
|||
|
||||
}
|
||||
|
||||
//System.out.println("There are " + mismatches + " mismatches out of " + totalBases + " bases for read " + p.getRead().getReadName());
|
||||
return new Pair<Integer, Integer>(mismatches, totalBases);
|
||||
return mismatches;
|
||||
}
|
||||
|
||||
/** Returns number of alignment blocks (continuous stretches of aligned bases) in the specified alignment.
|
||||
|
|
|
|||
|
|
@ -266,7 +266,7 @@ public class VCFGenotypeRecord implements Genotype, SampleBacked {
|
|||
Set<VCFFormatHeaderLine> result = new HashSet<VCFFormatHeaderLine>();
|
||||
result.add(new VCFFormatHeaderLine(GENOTYPE_KEY, 1, VCFFormatHeaderLine.INFO_TYPE.String, "Genotype"));
|
||||
result.add(new VCFFormatHeaderLine(GENOTYPE_QUALITY_KEY, 1, VCFFormatHeaderLine.INFO_TYPE.Integer, "Genotype Quality"));
|
||||
result.add(new VCFFormatHeaderLine(DEPTH_KEY, 1, VCFFormatHeaderLine.INFO_TYPE.Integer, "Read Depth (without MQ0 reads)"));
|
||||
result.add(new VCFFormatHeaderLine(DEPTH_KEY, 1, VCFFormatHeaderLine.INFO_TYPE.Integer, "Read Depth (only filtered reads used for calling)"));
|
||||
//result.add(new VCFFormatHeaderLine(HAPLOTYPE_QUALITY_KEY, 1, VCFFormatHeaderLine.INFO_TYPE.Integer, "Haplotype Quality"));
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ public class SecondBaseSkewIntegrationTest extends WalkerTest {
|
|||
+"-B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_pilot_pool3_raw_calls.geli "
|
||||
+"-vcf %s -sample variant -L /humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_test_intervals.interval_list";
|
||||
|
||||
String md5_for_this_test = "f7e67c353d3113447d1b9c8c39de6ed0";
|
||||
String md5_for_this_test = "4bd8a28bcbad107b102fc796918d5932";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(test_args,1, Arrays.asList(md5_for_this_test));
|
||||
executeTest("Testing on E2 annotated but not Q2 annotated file ",spec);
|
||||
|
|
|
|||
|
|
@ -66,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("1225d977f3e83d362141c1bdb4730016"));
|
||||
Arrays.asList("d9f623b69e20b9c6289562f123b867eb"));
|
||||
executeTest("test file has annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -74,7 +74,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("7234adba82893ebf63338ab20ecc610d"));
|
||||
Arrays.asList("d0e70ee36ed1a59b5f086bc30c9b2673"));
|
||||
executeTest("test file has annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -98,7 +98,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("05d7f1c30130ce13d355200be8c2b31d"));
|
||||
Arrays.asList("23928a3f79fd5d841505cdeaf342c8dc"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -106,7 +106,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("643fa948531ebbfa013e9eeced76430b"));
|
||||
Arrays.asList("2b47c7a6a7f0ce15fd1d1dd02ecab73b"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -20,9 +20,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
return baseTestString() + " --variant_output_format GELI -confidence 50";
|
||||
}
|
||||
|
||||
private static String OneMb1StateMD5 = "7e3fc1d8427329eb2a3e05a81011749a";
|
||||
private static String OneMb3StateMD5 = "f5912d5d6585436a77495688f09cf1cc";
|
||||
private static String OneMbEmpiricalMD5 = "b9b2d9c7eb9a7af416fddd3b77a72efe";
|
||||
private static String OneMb1StateMD5 = "c664bd887b89e9ebc0b4b569ad8eb128";
|
||||
private static String OneMb3StateMD5 = "9c68f6e900d081023ea97ec467a95bd8";
|
||||
private static String OneMbEmpiricalMD5 = "0b891eefeb2a2bfe707a8a0838b6d049";
|
||||
|
||||
// private static String oneMbMD5(BaseMismatchModel m) {
|
||||
// switch (m) {
|
||||
|
|
@ -47,7 +47,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1PointEM() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("ad00b255c83fe43213758b280646e4b4"));
|
||||
Arrays.asList("b19f85fddd08485c668e7192df79d944"));
|
||||
executeTest("testMultiSamplePilot1 - Point Estimate EM", spec);
|
||||
}
|
||||
|
||||
|
|
@ -55,7 +55,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot2PointEM() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("e709714e288c508f79c1ffc5cdbe9cca"));
|
||||
Arrays.asList("a6c8e77d1741f3f6d958a0634fa59e14"));
|
||||
executeTest("testMultiSamplePilot2 - Point Estimate EM", spec);
|
||||
}
|
||||
|
||||
|
|
@ -68,7 +68,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testPooled1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,000-10,024,000 -bm empirical -gm POOLED -ps 60 -confidence 30", 1,
|
||||
Arrays.asList("2214983d4ae3a19bc16e7cb89fc3128f"));
|
||||
Arrays.asList("459b8f6a6265b67718495e9bffe96fa8"));
|
||||
executeTest("testPooled1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -81,7 +81,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1Joint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("9e98eba56add9989e2d40834efc8c803"));
|
||||
Arrays.asList("c132c93cec5fdf02c3235180a7aa7dcc"));
|
||||
executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -89,7 +89,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot2Joint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("fe1077636feb41c69f7c4f64147e2bba"));
|
||||
Arrays.asList("09d48c56eae25fb6418e10feeb5b3fc5"));
|
||||
executeTest("testMultiSamplePilot2 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -97,7 +97,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSingleSamplePilot2Joint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
|
||||
Arrays.asList("6a739a0acb620aeed5d8e0de7b59877a"));
|
||||
Arrays.asList("3544b1eb97834e2050239c101eaddb2d"));
|
||||
executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -105,7 +105,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testGenotypeModeJoint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -genotype -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,001,000 -bm empirical -gm JOINT_ESTIMATE -confidence 70", 1,
|
||||
Arrays.asList("85fbcdd816ecc1d9686df244a6e039dd"));
|
||||
Arrays.asList("7e8422f8008a4fe24ea1f5c2913b5d31"));
|
||||
executeTest("testGenotypeMode - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -113,7 +113,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testAllBasesModeJoint() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -all_bases -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,001,000 -bm empirical -gm JOINT_ESTIMATE -confidence 70", 1,
|
||||
Arrays.asList("f871576cf49e7a83a0f6b5580b0178c2"));
|
||||
Arrays.asList("aedd59f9f2cae7fb07a53812b680852b"));
|
||||
executeTest("testAllBasesMode - Joint Estimate", spec);
|
||||
}
|
||||
|
||||
|
|
@ -141,7 +141,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -bm empirical" +
|
||||
" -vf GELI",
|
||||
1,
|
||||
Arrays.asList("46acd5a5d7301129ee3c0c9a4ab10259"));
|
||||
Arrays.asList("eaca4b2323714dbd7c3ed379ce1843ba"));
|
||||
|
||||
executeTest(String.format("testMultiTechnologies"), spec);
|
||||
}
|
||||
|
|
@ -178,7 +178,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void genotypeTest() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
testGeliLod5() + " -L 1:10,000,000-10,100,000 -bm empirical --genotype", 1,
|
||||
Arrays.asList("db34a348abd13ee894780296e8a7e909"));
|
||||
Arrays.asList("f9bdd9a8864467dbc4e5356bb8801a33"));
|
||||
executeTest("genotypeTest", spec);
|
||||
}
|
||||
|
||||
|
|
@ -237,8 +237,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testLOD() {
|
||||
HashMap<Double, String> e = new HashMap<Double, String>();
|
||||
e.put( 100.0, "94c5b48c0c956fcdacbffaa38a80d926" );
|
||||
e.put( 30.0, "df455abc1b2bc533aa1dc6eb088a835a" );
|
||||
e.put( 100.0, "6eec841b28fae433015b3d85608e03f7" );
|
||||
e.put( 30.0, "1b3365f41bbf6867516699afe9efc5f8" );
|
||||
|
||||
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -256,9 +256,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHeterozyosity() {
|
||||
HashMap<Double, String> e = new HashMap<Double, String>();
|
||||
e.put( 0.01, "b8837be7e8beb3ab2ed7150cdc022c65" );
|
||||
e.put( 0.0001, "ef0f2af7d13f166829d86b15fabc2b81" );
|
||||
e.put( 1.0 / 1850, "a435c8c966c11f4393a25a9d01c4fc3d" );
|
||||
e.put( 0.01, "601c48fc350083d14534ba5c3093edb9" );
|
||||
e.put( 0.0001, "bd03f7307314e45951d4d3e85fe63d16" );
|
||||
e.put( 1.0 / 1850, "662d479f1cd54480da1d0e66c81259b0" );
|
||||
|
||||
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -275,7 +275,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void empirical1MbTestBinaryGeli() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseTestString() + " -L 1:10,000,000-11,000,000 -bm empirical --variant_output_format GELI_BINARY -confidence 50", 1,
|
||||
Arrays.asList("18f175c7ccaeca57b8d412e9f4ebbe50"));
|
||||
Arrays.asList("b1027cf309c9ab7572528ce986e2c2d4"));
|
||||
executeTest("empirical1MbTestBinaryGeli", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue