diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/StratifiedAlignmentContext.java b/java/src/org/broadinstitute/sting/gatk/contexts/StratifiedAlignmentContext.java index cc51a3dbf..1f060ba7d 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/StratifiedAlignmentContext.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/StratifiedAlignmentContext.java @@ -44,11 +44,10 @@ public class StratifiedAlignmentContext { // Definitions: // COMPLETE = full alignment context - // MQ0FREE = full context without MQ0 reads - // FORWARD = reads on forward strand (*no* MQ0 reads) - // REVERSE = reads on forward strand (*no* MQ0 reads) + // FORWARD = reads on forward strand + // REVERSE = reads on forward strand // - public enum StratifiedContextType { COMPLETE, MQ0FREE, FORWARD, REVERSE } + public enum StratifiedContextType { COMPLETE, FORWARD, REVERSE } private GenomeLoc loc; private AlignmentContext[] contexts = new AlignmentContext[StratifiedContextType.values().length]; @@ -72,16 +71,12 @@ public class StratifiedAlignmentContext { } public void add(SAMRecord read, int offset) { - if ( read.getMappingQuality() > 0 ) { - reads[StratifiedContextType.MQ0FREE.ordinal()].add(read); - offsets[StratifiedContextType.MQ0FREE.ordinal()].add(offset); - if ( read.getReadNegativeStrandFlag() ) { - reads[StratifiedContextType.REVERSE.ordinal()].add(read); - offsets[StratifiedContextType.REVERSE.ordinal()].add(offset); - } else { - reads[StratifiedContextType.FORWARD.ordinal()].add(read); - offsets[StratifiedContextType.FORWARD.ordinal()].add(offset); - } + if ( read.getReadNegativeStrandFlag() ) { + reads[StratifiedContextType.REVERSE.ordinal()].add(read); + offsets[StratifiedContextType.REVERSE.ordinal()].add(offset); + } else { + reads[StratifiedContextType.FORWARD.ordinal()].add(read); + offsets[StratifiedContextType.FORWARD.ordinal()].add(offset); } reads[StratifiedContextType.COMPLETE.ordinal()].add(read); offsets[StratifiedContextType.COMPLETE.ordinal()].add(offset); @@ -90,18 +85,17 @@ public class StratifiedAlignmentContext { /** * Splits the given AlignmentContext into a StratifiedAlignmentContext per sample. * - * @param context the original AlignmentContext + * @param pileup the original pileup * @param assumedSingleSample if not null, any read without a readgroup will be given this sample name * @param collapseToThisSample if not null, all reads will be assigned this read group regardless of their actual read group * * @return a Map of sample name to StratifiedAlignmentContext * **/ - public static Map splitContextBySample(AlignmentContext context, String assumedSingleSample, String collapseToThisSample) { + public static Map splitContextBySample(ReadBackedPileup pileup, String assumedSingleSample, String collapseToThisSample) { HashMap contexts = new HashMap(); - ReadBackedPileup pileup = context.getPileup(); for (PileupElement p : pileup ) { // get the read @@ -125,7 +119,7 @@ public class StratifiedAlignmentContext { // create a new context object if this is the first time we're seeing a read for this sample StratifiedAlignmentContext myContext = contexts.get(sample); if ( myContext == null ) { - myContext = new StratifiedAlignmentContext(context.getLocation()); + myContext = new StratifiedAlignmentContext(pileup.getLocation()); contexts.put(sample, myContext); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java index b9c4feba0..25e38e078 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java @@ -39,7 +39,7 @@ public class AlleleBalance extends StandardVariantAnnotation { if ( genotypeStr.length() != 2 ) return null; - final String bases = new String(context.getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE).getPileup().getBases()).toUpperCase(); + final String bases = new String(context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getPileup().getBases()).toUpperCase(); if ( bases.length() == 0 ) return null; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java index 35c696b2b..7868c01ef 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java @@ -20,5 +20,5 @@ public class DepthOfCoverage extends StandardVariantAnnotation { public String getKeyName() { return VCFRecord.DEPTH_KEY; } - public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine(getKeyName(), 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "Total Depth (including MQ0 reads)"); } + public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine(getKeyName(), 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "Total Depth"); } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MismatchRate.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MismatchRate.java deleted file mode 100755 index f6e99e3a8..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MismatchRate.java +++ /dev/null @@ -1,35 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.annotator; - -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; -import org.broadinstitute.sting.utils.pileup.*; -import org.broadinstitute.sting.utils.genotype.Variation; -import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.utils.*; - -import java.util.Map; - - -public class MismatchRate implements VariantAnnotation { - - public String annotate(ReferenceContext ref, Map stratifiedContexts, Variation variation) { - int mismatches = 0; - int totalBases = 0; - for ( String sample : stratifiedContexts.keySet() ) { - ReadBackedPileup pileup = stratifiedContexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE).getPileup(); - Pair counts = AlignmentUtils.mismatchesInRefWindow(pileup, ref, true); - mismatches += counts.first; - totalBases += counts.second; - } - - // sanity check - if ( totalBases == 0 ) - return null; - - return String.format("%.2f", (double)mismatches/(double)totalBases); - } - - public String getKeyName() { return "MR"; } - - public VCFInfoHeaderLine getDescription() { return new VCFInfoHeaderLine("MR", 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Mismatch Rate of Reads Spanning This Position"); } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java index d9894e7af..b990573df 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java @@ -36,7 +36,7 @@ public class RankSumTest implements VariantAnnotation { if ( context == null ) continue; - fillQualsFromPileup(ref.getBase(), variation.getAlternativeBaseForSNP(), context.getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE).getPileup(), refQuals, altQuals); + fillQualsFromPileup(ref.getBase(), variation.getAlternativeBaseForSNP(), context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getPileup(), refQuals, altQuals); } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkew.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkew.java index 4cf84fce9..b81c0d6f3 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkew.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkew.java @@ -37,7 +37,7 @@ public class SecondBaseSkew implements VariantAnnotation { Pair depth = new Pair(0, 0); for ( String sample : stratifiedContexts.keySet() ) { - Pair sampleDepth = getSecondaryPileupNonrefCount(ref.getBase(), stratifiedContexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE).getPileup(), snp); + Pair sampleDepth = getSecondaryPileupNonrefCount(ref.getBase(), stratifiedContexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getPileup(), snp); depth.first += sampleDepth.first; depth.second += sampleDepth.second; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java index d9a91b637..5ba5a95e8 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java @@ -15,7 +15,7 @@ public class SpanningDeletions extends StandardVariantAnnotation { int deletions = 0; int depth = 0; for ( String sample : stratifiedContexts.keySet() ) { - ReadBackedPileup pileup = stratifiedContexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE).getPileup(); + ReadBackedPileup pileup = stratifiedContexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getPileup(); deletions += pileup.getNumberOfDeletions(); depth += pileup.size(); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index 7a741bc6e..198ed84a3 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -165,7 +165,7 @@ public class VariantAnnotator extends RodWalker { if ( BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 && variant.isBiallelic() && variant.isSNP() ) { - Map stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(context, null, null); + Map stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(context.getPileup(), null, null); if ( stratifiedContexts != null ) annotations = getAnnotations(ref, stratifiedContexts, variant, requestedAnnotations); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java index 650d77049..1a1b205e1 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java @@ -98,7 +98,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul if ( call instanceof ReadBacked ) { - ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE).getPileup(); + ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getPileup(); ((ReadBacked)call).setPileup(pileup); } if ( call instanceof SampleBacked ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java index 3b4bca690..15f6f640d 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java @@ -22,7 +22,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode public Pair> calculateGenotype(RefMetaDataTracker tracker, char ref, GenomeLoc loc, Map contexts, DiploidGenotypePriors priors) { // run the EM calculation - EMOutput overall = runEM(ref, contexts, priors, StratifiedAlignmentContext.StratifiedContextType.MQ0FREE); + EMOutput overall = runEM(ref, contexts, priors, StratifiedAlignmentContext.StratifiedContextType.COMPLETE); double PofD = Math.pow(10, overall.getPofD()); double PofNull = Math.pow(10, overall.getPofNull()); @@ -93,7 +93,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode for ( String sample : GLs.keySet() ) { // create the call - AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE); + AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE); GenotypeCall call = GenotypeWriterFactory.createSupportedGenotypeCall(OUTPUT_FORMAT, ref, context.getLocation()); // set the genotype and confidence @@ -105,7 +105,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode call.setGenotype(bestGenotype); if ( call instanceof ReadBacked ) { - ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE).getPileup(); + ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getPileup(); ((ReadBacked)call).setPileup(pileup); } if ( call instanceof SampleBacked ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java index dd44cbff0..a17ac618c 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java @@ -55,8 +55,8 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc initializeAlleleFrequencies(frequencyEstimationPoints); - initialize(ref, contexts, StratifiedAlignmentContext.StratifiedContextType.MQ0FREE); - calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedAlignmentContext.StratifiedContextType.MQ0FREE); + initialize(ref, contexts, StratifiedAlignmentContext.StratifiedContextType.COMPLETE); + calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedAlignmentContext.StratifiedContextType.COMPLETE); calculatePofFs(ref, frequencyEstimationPoints); // print out stats if we have a writer @@ -74,7 +74,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc int[] qualCounts = new int[4]; for ( String sample : contexts.keySet() ) { - AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.MQ0FREE); + AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE); // calculate the sum of quality scores for each base ReadBackedPileup pileup = context.getPileup(); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java index 39b486bf7..fc19e16a5 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java @@ -40,7 +40,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation return null; // get the genotype likelihoods - Pair discoveryGL = getSingleSampleLikelihoods(sampleContext, priors, StratifiedAlignmentContext.StratifiedContextType.MQ0FREE); + Pair discoveryGL = getSingleSampleLikelihoods(sampleContext, priors, StratifiedAlignmentContext.StratifiedContextType.COMPLETE); // find the index of the best genotype double[] normPosteriors = discoveryGL.second.getNormalizedPosteriors(); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index f4a7001d3..8a370cf21 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -88,9 +88,15 @@ public class UnifiedArgumentCollection { @Argument(fullName = "min_allele_frequency", shortName = "min_freq", doc = "The minimum possible allele frequency in a population (advanced)", required = false) public double MINIMUM_ALLELE_FREQUENCY = 1e-8; - @Argument(fullName = "minBaseQualityScore", shortName = "mbq", doc = "Minimum base quality required to consider a base for calling", required = false) - public Integer MIN_BASE_QUALTY_SCORE = -1; + @Argument(fullName = "min_base_quality_score", shortName = "mbq", doc = "Minimum base quality required to consider a base for calling", required = false) + public int MIN_BASE_QUALTY_SCORE = 20; - @Argument(fullName = "minMappingQualityScore", shortName = "mmq", doc = "Minimum read mapping quality required to consider a read for calling", required = false) - public Integer MIN_MAPPING_QUALTY_SCORE = -1; + @Argument(fullName = "min_mapping_quality_score", shortName = "mmq", doc = "Minimum read mapping quality required to consider a read for calling (MQ0 reads are ignored regardless)", required = false) + public int MIN_MAPPING_QUALTY_SCORE = 30; + + @Argument(fullName = "max_mismatches_in_40bp_window", shortName = "mm40", doc = "Maximum number of mismatches within a 40 bp window (20bp on either side) around the target position for a read to be used for calling", required = false) + public int MAX_MISMATCHES = 3; + + @Argument(fullName = "use_reads_with_bad_mates", shortName = "bad_mates", doc = "Use reads whose mates are mapped excessively far away for calling", required = false) + public boolean USE_BADLY_MATED_READS = false; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index da3bae24b..88ed9cf74 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -26,19 +26,16 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter; import org.broadinstitute.sting.gatk.contexts.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotator; -import org.broadinstitute.sting.gatk.walkers.Reference; -import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.pileup.*; import org.broadinstitute.sting.utils.cmdLine.*; import org.broadinstitute.sting.utils.genotype.*; -import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord; -import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine; -import org.broadinstitute.sting.utils.genotype.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.genotype.vcf.*; import net.sf.samtools.SAMReadGroupRecord; @@ -51,6 +48,7 @@ import java.util.*; * multi-sample, and pooled data. The user can choose from several different incorporated calculation models. */ @Reference(window=@Window(start=-20,stop=20)) +@ReadFilters({ZeroMappingQualityReadFilter.class}) public class UnifiedGenotyper extends LocusWalker>, Integer> { @ArgumentCollection private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection(); @@ -174,6 +172,7 @@ public class UnifiedGenotyper extends LocusWalker 0 && rawContext.getPileup().size() > UAC.MAX_READS_IN_PILEUP) ) + if ( pileup.size() == 0 || + (UAC.MAX_READS_IN_PILEUP > 0 && pileup.size() > UAC.MAX_READS_IN_PILEUP) ) return null; // are there too many deletions in the pileup? - ReadBackedPileup pileup = rawContext.getPileup().getBaseAndMappingFilteredPileup(UAC.MIN_BASE_QUALTY_SCORE, UAC.MIN_MAPPING_QUALTY_SCORE); - AlignmentContext context = new AlignmentContext(rawContext.getLocation(), pileup); if ( isValidDeletionFraction(UAC.MAX_DELETION_FRACTION) && - (double)pileup.getPileupWithoutMappingQualityZeroReads().getNumberOfDeletions() / (double)(pileup.size() - pileup.getNumberOfMappingQualityZeroReads()) > UAC.MAX_DELETION_FRACTION ) + (double)pileup.getNumberOfDeletions() / (double)pileup.size() > UAC.MAX_DELETION_FRACTION ) return null; // stratify the AlignmentContext and cut by sample // Note that for testing purposes, we may want to throw multi-samples at pooled mode - Map stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(context, UAC.ASSUME_SINGLE_SAMPLE, (UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED ? PooledCalculationModel.POOL_SAMPLE_NAME : null)); + Map stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(pileup, UAC.ASSUME_SINGLE_SAMPLE, (UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED ? PooledCalculationModel.POOL_SAMPLE_NAME : null)); if ( stratifiedContexts == null ) return null; DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE); - Pair> call = gcm.calculateGenotype(tracker, ref, context.getLocation(), stratifiedContexts, priors); + Pair> call = gcm.calculateGenotype(tracker, ref, rawContext.getLocation(), stratifiedContexts, priors); // annotate the call, if possible if ( call != null && call.first != null && call.first instanceof ArbitraryFieldsBacked ) { @@ -237,6 +240,18 @@ public class UnifiedGenotyper extends LocusWalker filteredPileup = new ArrayList(); + for ( PileupElement p : pileup ) { + if ( (useBadMates || !p.getRead().getReadPairedFlag() || p.getRead().getMateUnmappedFlag() || p.getRead().getMateReferenceIndex() == p.getRead().getReferenceIndex()) && + AlignmentUtils.mismatchesInRefWindow(p, refContext, true) <= maxMismatches ) + filteredPileup.add(p); + } + return new ReadBackedPileup(pileup.getLocation(), filteredPileup); + + } + private static boolean isValidDeletionFraction(double d) { return ( d >= 0.0 && d <= 1.0 ); } diff --git a/java/src/org/broadinstitute/sting/utils/AlignmentUtils.java b/java/src/org/broadinstitute/sting/utils/AlignmentUtils.java index 287afc5d7..c9c4e9ffd 100644 --- a/java/src/org/broadinstitute/sting/utils/AlignmentUtils.java +++ b/java/src/org/broadinstitute/sting/utils/AlignmentUtils.java @@ -118,7 +118,7 @@ public class AlignmentUtils { * @param refSeq chunk of reference sequence that subsumes the alignment completely (if alignment runs out of * the reference string, IndexOutOfBound exception will be thrown at runtime). * @param refIndex zero-based position, at which the alignment starts on the specified reference string. - * @return + * @return the number of mismatches */ public static int numMismatches(SAMRecord r, String refSeq, int refIndex) { int readIdx = 0; @@ -160,15 +160,12 @@ public class AlignmentUtils { * @param pileup the pileup with reads * @param ref the reference context * @param ignoreTargetSite if true, ignore mismatches at the target locus (i.e. the center of the window) - * @return a pair where the first value is the mismatches and the second is the total bases within the context + * @return the number of mismatches */ - public static Pair mismatchesInRefWindow(ReadBackedPileup pileup, ReferenceContext ref, boolean ignoreTargetSite) { - Pair mismatches = new Pair(0, 0); - for ( PileupElement p : pileup ) { - Pair mm = mismatchesInRefWindow(p, ref, ignoreTargetSite); - mismatches.first += mm.first; - mismatches.second += mm.second; - } + public static int mismatchesInRefWindow(ReadBackedPileup pileup, ReferenceContext ref, boolean ignoreTargetSite) { + int mismatches = 0; + for ( PileupElement p : pileup ) + mismatches += mismatchesInRefWindow(p, ref, ignoreTargetSite); return mismatches; } @@ -177,12 +174,11 @@ public class AlignmentUtils { * @param p the pileup element * @param ref the reference context * @param ignoreTargetSite if true, ignore mismatches at the target locus (i.e. the center of the window) - * @return a pair where the first value is the mismatches and the second is the total bases within the context + * @return the number of mismatches */ - public static Pair mismatchesInRefWindow(PileupElement p, ReferenceContext ref, boolean ignoreTargetSite) { + public static int mismatchesInRefWindow(PileupElement p, ReferenceContext ref, boolean ignoreTargetSite) { int mismatches = 0; - int totalBases = 0; GenomeLoc window = ref.getWindow(); char[] refBases = ref.getBases(); @@ -212,7 +208,6 @@ public class AlignmentUtils { if ( ignoreTargetSite && ref.getLocus().getStart() == currentPos ) continue; - totalBases++; char readChr = (char)readBases[readIndex]; if ( Character.toUpperCase(readChr) != Character.toUpperCase(refChr) ) mismatches++; @@ -234,8 +229,7 @@ public class AlignmentUtils { } - //System.out.println("There are " + mismatches + " mismatches out of " + totalBases + " bases for read " + p.getRead().getReadName()); - return new Pair(mismatches, totalBases); + return mismatches; } /** Returns number of alignment blocks (continuous stretches of aligned bases) in the specified alignment. diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java index 447f52951..19b00a8db 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java @@ -266,7 +266,7 @@ public class VCFGenotypeRecord implements Genotype, SampleBacked { Set result = new HashSet(); result.add(new VCFFormatHeaderLine(GENOTYPE_KEY, 1, VCFFormatHeaderLine.INFO_TYPE.String, "Genotype")); result.add(new VCFFormatHeaderLine(GENOTYPE_QUALITY_KEY, 1, VCFFormatHeaderLine.INFO_TYPE.Integer, "Genotype Quality")); - result.add(new VCFFormatHeaderLine(DEPTH_KEY, 1, VCFFormatHeaderLine.INFO_TYPE.Integer, "Read Depth (without MQ0 reads)")); + result.add(new VCFFormatHeaderLine(DEPTH_KEY, 1, VCFFormatHeaderLine.INFO_TYPE.Integer, "Read Depth (only filtered reads used for calling)")); //result.add(new VCFFormatHeaderLine(HAPLOTYPE_QUALITY_KEY, 1, VCFFormatHeaderLine.INFO_TYPE.Integer, "Haplotype Quality")); return result; } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkewIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkewIntegrationTest.java index accfd5d4d..38731d918 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkewIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkewIntegrationTest.java @@ -33,7 +33,7 @@ public class SecondBaseSkewIntegrationTest extends WalkerTest { +"-B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_pilot_pool3_raw_calls.geli " +"-vcf %s -sample variant -L /humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_test_intervals.interval_list"; - String md5_for_this_test = "f7e67c353d3113447d1b9c8c39de6ed0"; + String md5_for_this_test = "4bd8a28bcbad107b102fc796918d5932"; WalkerTestSpec spec = new WalkerTestSpec(test_args,1, Arrays.asList(md5_for_this_test)); executeTest("Testing on E2 annotated but not Q2 annotated file ",spec); diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 90284a1c9..ecb342d99 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -66,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("1225d977f3e83d362141c1bdb4730016")); + Arrays.asList("d9f623b69e20b9c6289562f123b867eb")); executeTest("test file has annotations, asking for annotations, #1", spec); } @@ -74,7 +74,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("7234adba82893ebf63338ab20ecc610d")); + Arrays.asList("d0e70ee36ed1a59b5f086bc30c9b2673")); executeTest("test file has annotations, asking for annotations, #2", spec); } @@ -98,7 +98,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("05d7f1c30130ce13d355200be8c2b31d")); + Arrays.asList("23928a3f79fd5d841505cdeaf342c8dc")); executeTest("test file doesn't have annotations, asking for annotations, #1", spec); } @@ -106,7 +106,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("643fa948531ebbfa013e9eeced76430b")); + Arrays.asList("2b47c7a6a7f0ce15fd1d1dd02ecab73b")); executeTest("test file doesn't have annotations, asking for annotations, #2", spec); } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 4672c88da..1ff579642 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -20,9 +20,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { return baseTestString() + " --variant_output_format GELI -confidence 50"; } - private static String OneMb1StateMD5 = "7e3fc1d8427329eb2a3e05a81011749a"; - private static String OneMb3StateMD5 = "f5912d5d6585436a77495688f09cf1cc"; - private static String OneMbEmpiricalMD5 = "b9b2d9c7eb9a7af416fddd3b77a72efe"; + private static String OneMb1StateMD5 = "c664bd887b89e9ebc0b4b569ad8eb128"; + private static String OneMb3StateMD5 = "9c68f6e900d081023ea97ec467a95bd8"; + private static String OneMbEmpiricalMD5 = "0b891eefeb2a2bfe707a8a0838b6d049"; // private static String oneMbMD5(BaseMismatchModel m) { // switch (m) { @@ -47,7 +47,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1PointEM() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1, - Arrays.asList("ad00b255c83fe43213758b280646e4b4")); + Arrays.asList("b19f85fddd08485c668e7192df79d944")); executeTest("testMultiSamplePilot1 - Point Estimate EM", spec); } @@ -55,7 +55,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot2PointEM() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1, - Arrays.asList("e709714e288c508f79c1ffc5cdbe9cca")); + Arrays.asList("a6c8e77d1741f3f6d958a0634fa59e14")); executeTest("testMultiSamplePilot2 - Point Estimate EM", spec); } @@ -68,7 +68,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testPooled1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,000-10,024,000 -bm empirical -gm POOLED -ps 60 -confidence 30", 1, - Arrays.asList("2214983d4ae3a19bc16e7cb89fc3128f")); + Arrays.asList("459b8f6a6265b67718495e9bffe96fa8")); executeTest("testPooled1", spec); } @@ -81,7 +81,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("9e98eba56add9989e2d40834efc8c803")); + Arrays.asList("c132c93cec5fdf02c3235180a7aa7dcc")); executeTest("testMultiSamplePilot1 - Joint Estimate", spec); } @@ -89,7 +89,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot2Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("fe1077636feb41c69f7c4f64147e2bba")); + Arrays.asList("09d48c56eae25fb6418e10feeb5b3fc5")); executeTest("testMultiSamplePilot2 - Joint Estimate", spec); } @@ -97,7 +97,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSingleSamplePilot2Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("6a739a0acb620aeed5d8e0de7b59877a")); + Arrays.asList("3544b1eb97834e2050239c101eaddb2d")); executeTest("testSingleSamplePilot2 - Joint Estimate", spec); } @@ -105,7 +105,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testGenotypeModeJoint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -genotype -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,001,000 -bm empirical -gm JOINT_ESTIMATE -confidence 70", 1, - Arrays.asList("85fbcdd816ecc1d9686df244a6e039dd")); + Arrays.asList("7e8422f8008a4fe24ea1f5c2913b5d31")); executeTest("testGenotypeMode - Joint Estimate", spec); } @@ -113,7 +113,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testAllBasesModeJoint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -all_bases -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,001,000 -bm empirical -gm JOINT_ESTIMATE -confidence 70", 1, - Arrays.asList("f871576cf49e7a83a0f6b5580b0178c2")); + Arrays.asList("aedd59f9f2cae7fb07a53812b680852b")); executeTest("testAllBasesMode - Joint Estimate", spec); } @@ -141,7 +141,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -bm empirical" + " -vf GELI", 1, - Arrays.asList("46acd5a5d7301129ee3c0c9a4ab10259")); + Arrays.asList("eaca4b2323714dbd7c3ed379ce1843ba")); executeTest(String.format("testMultiTechnologies"), spec); } @@ -178,7 +178,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void genotypeTest() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( testGeliLod5() + " -L 1:10,000,000-10,100,000 -bm empirical --genotype", 1, - Arrays.asList("db34a348abd13ee894780296e8a7e909")); + Arrays.asList("f9bdd9a8864467dbc4e5356bb8801a33")); executeTest("genotypeTest", spec); } @@ -237,8 +237,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testLOD() { HashMap e = new HashMap(); - e.put( 100.0, "94c5b48c0c956fcdacbffaa38a80d926" ); - e.put( 30.0, "df455abc1b2bc533aa1dc6eb088a835a" ); + e.put( 100.0, "6eec841b28fae433015b3d85608e03f7" ); + e.put( 30.0, "1b3365f41bbf6867516699afe9efc5f8" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -256,9 +256,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testHeterozyosity() { HashMap e = new HashMap(); - e.put( 0.01, "b8837be7e8beb3ab2ed7150cdc022c65" ); - e.put( 0.0001, "ef0f2af7d13f166829d86b15fabc2b81" ); - e.put( 1.0 / 1850, "a435c8c966c11f4393a25a9d01c4fc3d" ); + e.put( 0.01, "601c48fc350083d14534ba5c3093edb9" ); + e.put( 0.0001, "bd03f7307314e45951d4d3e85fe63d16" ); + e.put( 1.0 / 1850, "662d479f1cd54480da1d0e66c81259b0" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -275,7 +275,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void empirical1MbTestBinaryGeli() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseTestString() + " -L 1:10,000,000-11,000,000 -bm empirical --variant_output_format GELI_BINARY -confidence 50", 1, - Arrays.asList("18f175c7ccaeca57b8d412e9f4ebbe50")); + Arrays.asList("b1027cf309c9ab7572528ce986e2c2d4")); executeTest("empirical1MbTestBinaryGeli", spec); } }