Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Matt Hanna 2011-09-23 14:53:28 -04:00
commit e388c357ca
42 changed files with 889 additions and 685 deletions

View File

@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.examples;
import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.commandline.ArgumentCollection;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
@ -59,6 +60,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
* @author Your Name * @author Your Name
* @since Date created * @since Date created
*/ */
@Hidden
public class GATKDocsExample extends RodWalker<Integer, Integer> { public class GATKDocsExample extends RodWalker<Integer, Integer> {
/** /**
* Put detailed documentation about the argument here. No need to duplicate the summary information * Put detailed documentation about the argument here. No need to duplicate the summary information

View File

@ -43,6 +43,9 @@ import java.util.List;
import java.util.Map; import java.util.Map;
/**
* The allele balance (fraction of ref bases over ref + alt bases) across all bialleleic het-called samples
*/
public class AlleleBalance extends InfoFieldAnnotation { public class AlleleBalance extends InfoFieldAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -16,6 +16,9 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*; import java.util.*;
/**
* The allele balance (fraction of ref bases over ref + alt bases) separately for each bialleleic het-called sample
*/
public class AlleleBalanceBySample extends GenotypeAnnotation implements ExperimentalAnnotation { public class AlleleBalanceBySample extends GenotypeAnnotation implements ExperimentalAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {

View File

@ -6,8 +6,9 @@ import org.broadinstitute.sting.utils.variantcontext.Genotype;
import java.util.Map; import java.util.Map;
/**
* Abstract base class for all annotations that are normalized by depth
*/
public abstract class AnnotationByDepth extends InfoFieldAnnotation { public abstract class AnnotationByDepth extends InfoFieldAnnotation {

View File

@ -47,6 +47,9 @@ import java.util.List;
import java.util.Map; import java.util.Map;
/**
* Count of A, C, G, T bases across all samples
*/
public class BaseCounts extends InfoFieldAnnotation { public class BaseCounts extends InfoFieldAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -13,6 +13,9 @@ import java.util.LinkedHashMap;
import java.util.List; import java.util.List;
/**
* The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for base qualities (ref bases vs. bases of the alternate allele)
*/
public class BaseQualityRankSumTest extends RankSumTest { public class BaseQualityRankSumTest extends RankSumTest {
public List<String> getKeyNames() { return Arrays.asList("BaseQRankSum"); } public List<String> getKeyNames() { return Arrays.asList("BaseQRankSum"); }

View File

@ -44,6 +44,11 @@ import java.util.List;
import java.util.Map; import java.util.Map;
/**
* Allele count in genotypes, for each ALT allele, in the same order as listed;
* allele Frequency, for each ALT allele, in the same order as listed; total number
* of alleles in called genotypes.
*/
public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation { public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation {
private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY }; private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY };

View File

@ -16,7 +16,12 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
/**
* Total (unfiltered) depth over all samples.
*
* Affected by downsampling (-dcov) though, so the max value one can obtain for N samples with -dcov D
* is N * D
*/
public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation { public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -23,6 +23,25 @@ import java.util.List;
import java.util.Map; import java.util.Map;
/**
* The depth of coverage of each VCF allele in this sample
*
* Complementary fields that two important ways of thinking about the depth of the data for this sample
* at this site. The DP field describe the total depth of reads that passed the Unified Genotypers internal
* quality control metrics (like MAPQ > 17, for example), whatever base was present in the read at this site.
* The AD values (one for each of REF and ALT fields) is the count of all reads that carried with them the
* REF and ALT alleles. The reason for this distinction is that the DP is in some sense reflective of the
* power I have to determine the genotype of the sample at this site, while the AD tells me how many times
* I saw each of the REF and ALT alleles in the reads, free of any bias potentially introduced by filtering
* the reads. If, for example, I believe there really is a an A/T polymorphism at a site, then I would like
* to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would
* normally be excluded from the statistical calculations going into GQ and QUAL. Please note, however, that
* the AD isn't necessarily calculated exactly for indels (it counts as non-reference only those indels that
* are actually present and correctly left-aligned in the alignments themselves). Because of this fact and
* because the AD includes reads and bases that were filtered by the Unified Genotyper, one should not base
* assumptions about the underlying genotype based on it; instead, the genotype likelihoods (PLs) are what
* determine the genotype calls (see below).
*/
public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation { public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation {
private static String REF_ALLELE = "REF"; private static String REF_ALLELE = "REF";

View File

@ -43,6 +43,11 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*; import java.util.*;
/**
* Phred-scaled p-value using Fisher's Exact Test to detect strand bias (the variation
* being seen on only the forward or only the reverse strand) in the reads? More bias is
* indicative of false positive calls.
*/
public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation { public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation {
private static final String FS = "FS"; private static final String FS = "FS";
private static final double MIN_PVALUE = 1E-320; private static final double MIN_PVALUE = 1E-320;

View File

@ -17,6 +17,9 @@ import java.util.List;
import java.util.Map; import java.util.Map;
/**
* The GC content (# GC bases / # all bases) of the reference within 50 bp +/- this site
*/
public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnotation { public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -49,6 +49,10 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*; import java.util.*;
/**
* Consistency of the site with two (and only two) segregating haplotypes. Higher scores
* are indicative of regions with bad alignments, often leading to artifactual SNP and indel calls.
*/
public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation { public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation {
private final static boolean DEBUG = false; private final static boolean DEBUG = false;
private final static int MIN_CONTEXT_WING_SIZE = 10; private final static int MIN_CONTEXT_WING_SIZE = 10;

View File

@ -19,6 +19,9 @@ import java.util.List;
import java.util.Map; import java.util.Map;
/**
* Phred-scaled P value of genotype-based (using GT field) test for Hardy-Weinberg test for disequilibrium
*/
public class HardyWeinberg extends InfoFieldAnnotation implements WorkInProgressAnnotation { public class HardyWeinberg extends InfoFieldAnnotation implements WorkInProgressAnnotation {
private static final int MIN_SAMPLES = 10; private static final int MIN_SAMPLES = 10;

View File

@ -16,7 +16,9 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
/**
* Largest contiguous homopolymer run of the variant allele in either direction on the reference.
*/
public class HomopolymerRun extends InfoFieldAnnotation implements StandardAnnotation { public class HomopolymerRun extends InfoFieldAnnotation implements StandardAnnotation {
private boolean ANNOTATE_INDELS = true; private boolean ANNOTATE_INDELS = true;

View File

@ -17,14 +17,15 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
/**
* Created by IntelliJ IDEA.
* User: rpoplin
* Date: 5/16/11
*/
// A set of annotations calculated directly from the GLs /**
public class GLstats extends InfoFieldAnnotation implements StandardAnnotation { * Likelihood-based (using PL field) test for the inbreeding among samples.
*
* A continuous generalization of the Hardy-Weinberg test for disequilibrium that works
* well with limited coverage per sample. See the 1000 Genomes Phase I release for
* more information.
*/
public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnnotation {
private static final int MIN_SAMPLES = 10; private static final int MIN_SAMPLES = 10;

View File

@ -14,11 +14,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*; import java.util.*;
/** /**
* Created by IntelliJ IDEA. * Rough category of indel type (insertion, deletion, multi-allelic, other)
* User: delangel
* Date: Mar 11, 2011
* Time: 11:47:33 AM
* To change this template use File | Settings | File Templates.
*/ */
public class IndelType extends InfoFieldAnnotation implements ExperimentalAnnotation { public class IndelType extends InfoFieldAnnotation implements ExperimentalAnnotation {

View File

@ -17,6 +17,9 @@ import java.util.List;
import java.util.Map; import java.util.Map;
/**
* Triplet annotation: fraction of MAQP == 0, MAPQ < 10, and count of all mapped reads
*/
public class LowMQ extends InfoFieldAnnotation { public class LowMQ extends InfoFieldAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -14,6 +14,9 @@ import java.util.LinkedHashMap;
import java.util.List; import java.util.List;
/**
* The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for mapping qualities (reads with ref bases vs. those with the alternate allele)
*/
public class MappingQualityRankSumTest extends RankSumTest { public class MappingQualityRankSumTest extends RankSumTest {
public List<String> getKeyNames() { return Arrays.asList("MQRankSum"); } public List<String> getKeyNames() { return Arrays.asList("MQRankSum"); }

View File

@ -19,6 +19,9 @@ import java.util.List;
import java.util.Map; import java.util.Map;
/**
* Total count across all samples of mapping quality zero reads
*/
public class MappingQualityZero extends InfoFieldAnnotation implements StandardAnnotation { public class MappingQualityZero extends InfoFieldAnnotation implements StandardAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -1,85 +1,81 @@
/* /*
* Copyright (c) 2010 The Broad Institute * Copyright (c) 2010 The Broad Institute
* *
* Permission is hereby granted, free of charge, to any person * Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation * obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without * files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, * restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell * copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the * copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following * Software is furnished to do so, subject to the following
* conditions: * conditions:
* *
* The above copyright notice and this permission notice shall be * The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software. * included in all copies or substantial portions of the Software.
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE. * OTHER DEALINGS IN THE SOFTWARE.
*/ */
package org.broadinstitute.sting.gatk.walkers.annotator; package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
/** /**
* Created by IntelliJ IDEA. * Count for each sample of mapping quality zero reads
* User: asivache */
* Date: Feb 4, 2011 public class MappingQualityZeroBySample extends GenotypeAnnotation {
* Time: 6:46:25 PM public Map<String, Object> annotate(RefMetaDataTracker tracker,
* To change this template use File | Settings | File Templates. AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext context, VariantContext vc, Genotype g) {
*/ if ( g == null || !g.isCalled() )
public class MappingQualityZeroBySample extends GenotypeAnnotation { return null;
public Map<String, Object> annotate(RefMetaDataTracker tracker,
AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext context, VariantContext vc, Genotype g) { int mq0 = 0;
if ( g == null || !g.isCalled() ) ReadBackedPileup pileup = null;
return null; if (vc.isIndel() && context.hasExtendedEventPileup())
pileup = context.getExtendedEventPileup();
int mq0 = 0; else if (context.hasBasePileup())
ReadBackedPileup pileup = null; pileup = context.getBasePileup();
if (vc.isIndel() && context.hasExtendedEventPileup()) else return null;
pileup = context.getExtendedEventPileup();
else if (context.hasBasePileup()) if (pileup != null) {
pileup = context.getBasePileup(); for (PileupElement p : pileup ) {
else return null; if ( p.getMappingQual() == 0 )
mq0++;
if (pileup != null) { }
for (PileupElement p : pileup ) { }
if ( p.getMappingQual() == 0 ) Map<String, Object> map = new HashMap<String, Object>();
mq0++; map.put(getKeyNames().get(0), String.format("%d", mq0));
} return map;
} }
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), String.format("%d", mq0)); public List<String> getKeyNames() { return Arrays.asList(VCFConstants.MAPPING_QUALITY_ZERO_KEY); }
return map;
} public List<VCFFormatHeaderLine> getDescriptions() { return Arrays.asList(
new VCFFormatHeaderLine(getKeyNames().get(0), 1,
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.MAPPING_QUALITY_ZERO_KEY); } VCFHeaderLineType.Integer, "Number of Mapping Quality Zero Reads per sample")); }
public List<VCFFormatHeaderLine> getDescriptions() { return Arrays.asList(
new VCFFormatHeaderLine(getKeyNames().get(0), 1, }
VCFHeaderLineType.Integer, "Number of Mapping Quality Zero Reads per sample")); }
}

View File

@ -17,8 +17,9 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
/**
* Fraction of all reads across samples that have mapping quality zero
*/
public class MappingQualityZeroFraction extends InfoFieldAnnotation implements ExperimentalAnnotation { public class MappingQualityZeroFraction extends InfoFieldAnnotation implements ExperimentalAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -17,11 +17,8 @@ import java.util.List;
import java.util.Map; import java.util.Map;
/** /**
* Created by IntelliJ IDEA. * The number of N bases, counting only SOLiD data
* User: rpoplin
* Date: 5/16/11
*/ */
public class NBaseCount extends InfoFieldAnnotation { public class NBaseCount extends InfoFieldAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if( stratifiedContexts.size() == 0 ) if( stratifiedContexts.size() == 0 )

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.annotator; package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -15,7 +16,11 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
/**
* Variant confidence (given as (AB+BB)/AA from the PLs) / unfiltered depth.
*
* Low scores are indicative of false positive calls and artifacts.
*/
public class QualByDepth extends AnnotationByDepth implements StandardAnnotation { public class QualByDepth extends AnnotationByDepth implements StandardAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -21,6 +21,9 @@ import java.util.List;
import java.util.Map; import java.util.Map;
/**
* Root Mean Square of the mapping quality of the reads across all samples.
*/
public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAnnotation { public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -21,7 +21,9 @@ import java.util.List;
import java.util.Map; import java.util.Map;
/**
* Abstract root for all RankSum based annotations
*/
public abstract class RankSumTest extends InfoFieldAnnotation implements StandardAnnotation { public abstract class RankSumTest extends InfoFieldAnnotation implements StandardAnnotation {
static final double INDEL_LIKELIHOOD_THRESH = 0.1; static final double INDEL_LIKELIHOOD_THRESH = 0.1;
static final boolean DEBUG = false; static final boolean DEBUG = false;

View File

@ -1,209 +1,207 @@
/* /*
* Copyright (c) 2010 The Broad Institute * Copyright (c) 2010 The Broad Institute
* *
* Permission is hereby granted, free of charge, to any person * Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation * obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without * files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, * restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell * copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the * copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following * Software is furnished to do so, subject to the following
* conditions: * conditions:
* *
* The above copyright notice and this permission notice shall be * The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software. * included in all copies or substantial portions of the Software.
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE. * OTHER DEALINGS IN THE SOFTWARE.
*/ */
package org.broadinstitute.sting.gatk.walkers.annotator; package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount; import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Arrays;
import java.util.HashMap; import java.util.Arrays;
import java.util.List; import java.util.HashMap;
import java.util.Map; import java.util.List;
import java.util.Map;
/**
* Created by IntelliJ IDEA. /**
* User: asivache * Unsupported
* Date: Feb 4, 2011 */
* Time: 3:59:27 PM @Hidden
* To change this template use File | Settings | File Templates. public class ReadDepthAndAllelicFractionBySample extends GenotypeAnnotation {
*/
public class ReadDepthAndAllelicFractionBySample extends GenotypeAnnotation { private static String REF_ALLELE = "REF";
private static String REF_ALLELE = "REF"; private static String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time
private static String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref,
AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, if ( g == null || !g.isCalled() )
AlignmentContext stratifiedContext, VariantContext vc, Genotype g) { return null;
if ( g == null || !g.isCalled() )
return null; if ( vc.isSNP() )
return annotateSNP(stratifiedContext, vc);
if ( vc.isSNP() ) if ( vc.isIndel() )
return annotateSNP(stratifiedContext, vc); return annotateIndel(stratifiedContext, vc);
if ( vc.isIndel() )
return annotateIndel(stratifiedContext, vc); return null;
}
return null;
} private Map<String,Object> annotateSNP(AlignmentContext stratifiedContext, VariantContext vc) {
private Map<String,Object> annotateSNP(AlignmentContext stratifiedContext, VariantContext vc) { if ( ! stratifiedContext.hasBasePileup() ) return null;
if ( ! stratifiedContext.hasBasePileup() ) return null; HashMap<Byte, Integer> alleleCounts = new HashMap<Byte, Integer>();
for ( Allele allele : vc.getAlternateAlleles() )
HashMap<Byte, Integer> alleleCounts = new HashMap<Byte, Integer>(); alleleCounts.put(allele.getBases()[0], 0);
for ( Allele allele : vc.getAlternateAlleles() )
alleleCounts.put(allele.getBases()[0], 0); ReadBackedPileup pileup = stratifiedContext.getBasePileup();
int totalDepth = pileup.size();
ReadBackedPileup pileup = stratifiedContext.getBasePileup();
int totalDepth = pileup.size(); Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), totalDepth); // put total depth in right away
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), totalDepth); // put total depth in right away if ( totalDepth == 0 ) return map; // done, can not compute FA at 0 coverage!!
if ( totalDepth == 0 ) return map; // done, can not compute FA at 0 coverage!! int mq0 = 0; // number of "ref" reads that are acually mq0
for ( PileupElement p : pileup ) {
int mq0 = 0; // number of "ref" reads that are acually mq0 if ( p.getMappingQual() == 0 ) {
for ( PileupElement p : pileup ) { mq0++;
if ( p.getMappingQual() == 0 ) { continue;
mq0++; }
continue; if ( alleleCounts.containsKey(p.getBase()) ) // non-mq0 read and it's an alt
} alleleCounts.put(p.getBase(), alleleCounts.get(p.getBase())+1);
if ( alleleCounts.containsKey(p.getBase()) ) // non-mq0 read and it's an alt }
alleleCounts.put(p.getBase(), alleleCounts.get(p.getBase())+1);
} if ( mq0 == totalDepth ) return map; // if all reads are mq0, there is nothing left to do
if ( mq0 == totalDepth ) return map; // if all reads are mq0, there is nothing left to do // we need to add counts in the correct order
String[] fracs = new String[alleleCounts.size()];
// we need to add counts in the correct order for (int i = 0; i < vc.getAlternateAlleles().size(); i++) {
String[] fracs = new String[alleleCounts.size()]; fracs[i] = String.format("%.3f", ((float)alleleCounts.get(vc.getAlternateAllele(i).getBases()[0]))/(totalDepth-mq0));
for (int i = 0; i < vc.getAlternateAlleles().size(); i++) { }
fracs[i] = String.format("%.3f", ((float)alleleCounts.get(vc.getAlternateAllele(i).getBases()[0]))/(totalDepth-mq0));
} map.put(getKeyNames().get(1), fracs);
return map;
map.put(getKeyNames().get(1), fracs); }
return map;
} private Map<String,Object> annotateIndel(AlignmentContext
stratifiedContext, VariantContext
private Map<String,Object> annotateIndel(AlignmentContext vc) {
stratifiedContext, VariantContext
vc) { if ( ! stratifiedContext.hasExtendedEventPileup() ) {
return null;
if ( ! stratifiedContext.hasExtendedEventPileup() ) { }
return null;
} ReadBackedExtendedEventPileup pileup = stratifiedContext.getExtendedEventPileup();
if ( pileup == null )
ReadBackedExtendedEventPileup pileup = stratifiedContext.getExtendedEventPileup(); return null;
if ( pileup == null ) int totalDepth = pileup.size();
return null;
int totalDepth = pileup.size(); Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), totalDepth); // put total depth in right away
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), totalDepth); // put total depth in right away if ( totalDepth == 0 ) return map;
int mq0 = 0; // number of "ref" reads that are acually mq0
if ( totalDepth == 0 ) return map;
int mq0 = 0; // number of "ref" reads that are acually mq0 HashMap<String, Integer> alleleCounts = new HashMap<String, Integer>();
Allele refAllele = vc.getReference();
HashMap<String, Integer> alleleCounts = new HashMap<String, Integer>();
Allele refAllele = vc.getReference(); for ( Allele allele : vc.getAlternateAlleles() ) {
for ( Allele allele : vc.getAlternateAlleles() ) { if ( allele.isNoCall() ) {
continue; // this does not look so good, should we die???
if ( allele.isNoCall() ) { }
continue; // this does not look so good, should we die???
} alleleCounts.put(getAlleleRepresentation(allele), 0);
}
alleleCounts.put(getAlleleRepresentation(allele), 0);
} for ( ExtendedEventPileupElement e : pileup.toExtendedIterable() ) {
for ( ExtendedEventPileupElement e : pileup.toExtendedIterable() ) { if ( e.getMappingQual() == 0 ) {
mq0++;
if ( e.getMappingQual() == 0 ) { continue;
mq0++; }
continue;
} if ( e.isInsertion() ) {
if ( e.isInsertion() ) { final String b = e.getEventBases();
if ( alleleCounts.containsKey(b) ) {
final String b = e.getEventBases(); alleleCounts.put(b, alleleCounts.get(b)+1);
if ( alleleCounts.containsKey(b) ) { }
alleleCounts.put(b, alleleCounts.get(b)+1);
} } else {
if ( e.isDeletion() ) {
} else { if ( e.getEventLength() == refAllele.length() ) {
if ( e.isDeletion() ) { // this is indeed the deletion allele recorded in VC
if ( e.getEventLength() == refAllele.length() ) { final String b = DEL;
// this is indeed the deletion allele recorded in VC if ( alleleCounts.containsKey(b) ) {
final String b = DEL; alleleCounts.put(b, alleleCounts.get(b)+1);
if ( alleleCounts.containsKey(b) ) { }
alleleCounts.put(b, alleleCounts.get(b)+1); }
} // else {
} // System.out.print(" deletion of WRONG length found");
// else { // }
// System.out.print(" deletion of WRONG length found"); }
// } }
} }
}
} if ( mq0 == totalDepth ) return map;
if ( mq0 == totalDepth ) return map; String[] fracs = new String[alleleCounts.size()];
for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
String[] fracs = new String[alleleCounts.size()]; fracs[i] = String.format("%.3f",
for (int i = 0; i < vc.getAlternateAlleles().size(); i++) ((float)alleleCounts.get(getAlleleRepresentation(vc.getAlternateAllele(i))))/(totalDepth-mq0));
fracs[i] = String.format("%.3f",
((float)alleleCounts.get(getAlleleRepresentation(vc.getAlternateAllele(i))))/(totalDepth-mq0)); map.put(getKeyNames().get(1), fracs);
map.put(getKeyNames().get(1), fracs); //map.put(getKeyNames().get(0), counts);
return map;
//map.put(getKeyNames().get(0), counts); }
return map;
} private String getAlleleRepresentation(Allele allele) {
if ( allele.isNull() ) { // deletion wrt the ref
private String getAlleleRepresentation(Allele allele) { return DEL;
if ( allele.isNull() ) { // deletion wrt the ref } else { // insertion, pass actual bases
return DEL; return allele.getBaseString();
} else { // insertion, pass actual bases }
return allele.getBaseString();
} }
} // public String getIndelBases()
public List<String> getKeyNames() { return Arrays.asList("DP","FA"); }
// public String getIndelBases()
public List<String> getKeyNames() { return Arrays.asList("DP","FA"); } public List<VCFFormatHeaderLine> getDescriptions() {
return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0),
public List<VCFFormatHeaderLine> getDescriptions() { 1,
return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0), VCFHeaderLineType.Integer,
1, "Total read depth per sample, including MQ0"),
VCFHeaderLineType.Integer, new VCFFormatHeaderLine(getKeyNames().get(1),
"Total read depth per sample, including MQ0"), VCFHeaderLineCount.UNBOUNDED,
new VCFFormatHeaderLine(getKeyNames().get(1), VCFHeaderLineType.Float,
VCFHeaderLineCount.UNBOUNDED, "Fractions of reads (excluding MQ0 from both ref and alt) supporting each reported alternative allele, per sample"));
VCFHeaderLineType.Float, }
"Fractions of reads (excluding MQ0 from both ref and alt) supporting each reported alternative allele, per sample")); }
}
}

View File

@ -19,11 +19,8 @@ import java.util.LinkedHashMap;
import java.util.List; import java.util.List;
/** /**
* Created by IntelliJ IDEA. * The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for the distance from the end of the read for reads with the alternate allele; if the alternate allele is only seen near the ends of reads this is indicative of error).
* User: rpoplin
* Date: 3/30/11
*/ */
public class ReadPosRankSumTest extends RankSumTest { public class ReadPosRankSumTest extends RankSumTest {
public List<String> getKeyNames() { return Arrays.asList("ReadPosRankSum"); } public List<String> getKeyNames() { return Arrays.asList("ReadPosRankSum"); }

View File

@ -15,8 +15,9 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
/**
* SB annotation value by depth of alt containing samples
*/
public class SBByDepth extends AnnotationByDepth { public class SBByDepth extends AnnotationByDepth {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -41,7 +41,9 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
/**
* List all of the samples in the info field
*/
public class SampleList extends InfoFieldAnnotation { public class SampleList extends InfoFieldAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -17,6 +17,9 @@ import java.util.List;
import java.util.Map; import java.util.Map;
/**
* Fraction of reads containing spanning deletions at this site.
*/
public class SpanningDeletions extends InfoFieldAnnotation implements StandardAnnotation { public class SpanningDeletions extends InfoFieldAnnotation implements StandardAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.annotator; package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -19,12 +20,9 @@ import java.util.List;
import java.util.Map; import java.util.Map;
/** /**
* Created by IntelliJ IDEA. * Counts of bases from SLX, 454, and SOLiD at this site
* User: delangel
* Date: 6/29/11
* Time: 3:14 PM
* To change this template use File | Settings | File Templates.
*/ */
@Hidden
public class TechnologyComposition extends InfoFieldAnnotation implements ExperimentalAnnotation { public class TechnologyComposition extends InfoFieldAnnotation implements ExperimentalAnnotation {
private String nSLX = "NumSLX"; private String nSLX = "NumSLX";
private String n454 ="Num454"; private String n454 ="Num454";

View File

@ -48,27 +48,12 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
// code for testing purposes // code for testing purposes
// //
private final static boolean DEBUG = false; private final static boolean DEBUG = false;
private final static boolean PRINT_LIKELIHOODS = false;
private final static int N_CYCLES = 1;
private SimpleTimer timerExpt = new SimpleTimer("linearExactBanded");
private SimpleTimer timerGS = new SimpleTimer("linearExactGS");
private final static boolean COMPARE_TO_GS = false;
public enum ExactCalculation {
N2_GOLD_STANDARD,
LINEAR_EXPERIMENTAL
}
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6 private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
private final boolean SIMPLE_GREEDY_GENOTYPER = false;
private boolean SIMPLE_GREEDY_GENOTYPER = false;
private final static double SUM_GL_THRESH_NOCALL = -0.001; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call. private final static double SUM_GL_THRESH_NOCALL = -0.001; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
final private ExactCalculation calcToUse;
protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) { protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
super(UAC, N, logger, verboseWriter); super(UAC, N, logger, verboseWriter);
calcToUse = UAC.EXACT_CALCULATION_TYPE;
} }
public void getLog10PNonRef(RefMetaDataTracker tracker, public void getLog10PNonRef(RefMetaDataTracker tracker,
@ -76,43 +61,12 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
Map<String, Genotype> GLs, Set<Allele>alleles, Map<String, Genotype> GLs, Set<Allele>alleles,
double[] log10AlleleFrequencyPriors, double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors) { double[] log10AlleleFrequencyPosteriors) {
// todo -- REMOVE ME AFTER TESTING final int numAlleles = alleles.size();
// todo -- REMOVE ME AFTER TESTING final double[][] posteriorCache = numAlleles > 2 ? new double[numAlleles-1][] : null;
// todo -- REMOVE ME AFTER TESTING final double[] bestAFguess = numAlleles > 2 ? new double[numAlleles-1] : null;
double[] gsPosteriors;
if ( COMPARE_TO_GS ) // due to annoying special values in incoming array, we have to clone up here
gsPosteriors = log10AlleleFrequencyPosteriors.clone();
int idxAA = GenotypeType.AA.ordinal();
int idxAB = GenotypeType.AB.ordinal();
int idxBB = GenotypeType.BB.ordinal();
// todo -- remove me after testing
if ( N_CYCLES > 1 ) {
for ( int i = 0; i < N_CYCLES; i++) {
timerGS.restart();
linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.clone(), idxAA, idxAB, idxBB);
timerGS.stop();
timerExpt.restart();
linearExactBanded(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.clone());
timerExpt.stop();
}
System.out.printf("good = %.2f, expt = %.2f, delta = %.2f%n",
timerGS.getElapsedTime(), timerExpt.getElapsedTime(), timerExpt.getElapsedTime()-timerGS.getElapsedTime());
}
int lastK = -1;
int numAlleles = alleles.size();
int idxDiag = numAlleles; int idxDiag = numAlleles;
int incr = numAlleles - 1; int incr = numAlleles - 1;
double[][] posteriorCache = new double[numAlleles-1][];
double[] bestAFguess = new double[numAlleles-1];
for (int k=1; k < numAlleles; k++) { for (int k=1; k < numAlleles; k++) {
// multi-allelic approximation, part 1: Ideally // multi-allelic approximation, part 1: Ideally
// for each alt allele compute marginal (suboptimal) posteriors - // for each alt allele compute marginal (suboptimal) posteriors -
@ -121,24 +75,17 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
// So, for example, with 2 alt alleles, likelihoods have AA,AB,AC,BB,BC,CC. // So, for example, with 2 alt alleles, likelihoods have AA,AB,AC,BB,BC,CC.
// 3 alt alleles: AA,AB,AC,AD BB BC BD CC CD DD // 3 alt alleles: AA,AB,AC,AD BB BC BD CC CD DD
idxAA = 0; final int idxAA = 0;
idxAB = k; final int idxAB = k;
// yy is always element on the diagonal. // yy is always element on the diagonal.
// 2 alleles: BBelement 2 // 2 alleles: BBelement 2
// 3 alleles: BB element 3. CC element 5 // 3 alleles: BB element 3. CC element 5
// 4 alleles: // 4 alleles:
idxBB = idxDiag; final int idxBB = idxDiag;
idxDiag += incr--; idxDiag += incr--;
// todo - possible cleanup final int lastK = linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB);
switch ( calcToUse ) {
case N2_GOLD_STANDARD:
lastK = gdaN2GoldStandard(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB);
break;
case LINEAR_EXPERIMENTAL:
lastK = linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB);
break;
}
if (numAlleles > 2) { if (numAlleles > 2) {
posteriorCache[k-1] = log10AlleleFrequencyPosteriors.clone(); posteriorCache[k-1] = log10AlleleFrequencyPosteriors.clone();
bestAFguess[k-1] = (double)MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors); bestAFguess[k-1] = (double)MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors);
@ -153,39 +100,14 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
log10AlleleFrequencyPosteriors[k] = (posteriorCache[mostLikelyAlleleIdx][k]); log10AlleleFrequencyPosteriors[k] = (posteriorCache[mostLikelyAlleleIdx][k]);
} }
// todo -- REMOVE ME AFTER TESTING
// todo -- REMOVE ME AFTER TESTING
// todo -- REMOVE ME AFTER TESTING
if ( COMPARE_TO_GS ) {
gdaN2GoldStandard(GLs, log10AlleleFrequencyPriors, gsPosteriors, idxAA, idxAB, idxBB);
double log10thisPVar = Math.log10(MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors)[0]);
double log10gsPVar = Math.log10(MathUtils.normalizeFromLog10(gsPosteriors)[0]);
boolean eq = (log10thisPVar == Double.NEGATIVE_INFINITY && log10gsPVar == Double.NEGATIVE_INFINITY) || MathUtils.compareDoubles(log10thisPVar, log10gsPVar, 1e-4) == 0;
if ( ! eq || PRINT_LIKELIHOODS ) {
System.out.printf("----------------------------------------%n");
for (int k=0; k < log10AlleleFrequencyPosteriors.length; k++) {
double x = log10AlleleFrequencyPosteriors[k];
System.out.printf(" %d\t%.2f\t%.2f\t%b%n", k,
x < -1e10 ? Double.NEGATIVE_INFINITY : x, gsPosteriors[k],
log10AlleleFrequencyPosteriors[k] == gsPosteriors[k]);
}
System.out.printf("MAD_AC\t%d\t%d\t%.2f\t%.2f\t%.6f%n",
ref.getLocus().getStart(), lastK, log10thisPVar, log10gsPVar, log10thisPVar - log10gsPVar);
}
}
} }
private static final ArrayList<double[]> getGLs(Map<String, Genotype> GLs) { private static final ArrayList<double[]> getGLs(Map<String, Genotype> GLs) {
ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>(); ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>();
//int j = 0;
genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy
for ( Genotype sample : GLs.values() ) { for ( Genotype sample : GLs.values() ) {
if ( sample.hasLikelihoods() ) { if ( sample.hasLikelihoods() ) {
//double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(GLs.get(sample).getLikelihoods());
double[] gls = sample.getLikelihoods().getAsVector(); double[] gls = sample.getLikelihoods().getAsVector();
if (MathUtils.sum(gls) < SUM_GL_THRESH_NOCALL) if (MathUtils.sum(gls) < SUM_GL_THRESH_NOCALL)
@ -240,84 +162,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
} }
} }
// now with banding
public int linearExactBanded(Map<String, Genotype> GLs,
double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors) {
throw new NotImplementedException();
// final int numSamples = GLs.size();
// final int numChr = 2*numSamples;
// final double[][] genotypeLikelihoods = getGLs(GLs);
//
// final ExactACCache logY = new ExactACCache(numSamples+1);
// logY.getkMinus0()[0] = 0.0; // the zero case
//
// double maxLog10L = Double.NEGATIVE_INFINITY;
// boolean done = false;
// int lastK = -1;
// final int BAND_SIZE = 10;
//
// for (int k=0; k <= numChr && ! done; k++ ) {
// final double[] kMinus0 = logY.getkMinus0();
// int jStart = Math.max(k - BAND_SIZE, 1);
// int jStop = Math.min(k + BAND_SIZE, numSamples);
//
// if ( k == 0 ) { // special case for k = 0
// for ( int j=1; j <= numSamples; j++ ) {
// kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods[j][GenotypeType.AA.ordinal()];
// }
// } else { // k > 0
// final double[] kMinus1 = logY.getkMinus1();
// final double[] kMinus2 = logY.getkMinus2();
// Arrays.fill(kMinus0,0);
//
// for ( int j = jStart; j <= jStop; j++ ) {
// final double[] gl = genotypeLikelihoods[j];
// final double logDenominator = log10Cache[2*j] + log10Cache[2*j-1];
//
// double aa = Double.NEGATIVE_INFINITY;
// double ab = Double.NEGATIVE_INFINITY;
// if (k < 2*j-1)
// aa = log10Cache[2*j-k] + log10Cache[2*j-k-1] + kMinus0[j-1] + gl[GenotypeType.AA.ordinal()];
//
// if (k < 2*j)
// ab = log10Cache[2*k] + log10Cache[2*j-k]+ kMinus1[j-1] + gl[GenotypeType.AB.ordinal()];
//
// double log10Max;
// if (k > 1) {
// final double bb = log10Cache[k] + log10Cache[k-1] + kMinus2[j-1] + gl[GenotypeType.BB.ordinal()];
// log10Max = approximateLog10SumLog10(aa, ab, bb);
// } else {
// // we know we aren't considering the BB case, so we can use an optimized log10 function
// log10Max = approximateLog10SumLog10(aa, ab);
// }
//
// // finally, update the L(j,k) value
// kMinus0[j] = log10Max - logDenominator;
//
// String offset = Utils.dupString(' ',k);
// System.out.printf("%s%3d %3d %.2f%n", offset, k, j, kMinus0[j]);
// }
// }
//
// // update the posteriors vector
// final double log10LofK = kMinus0[jStop];
// log10AlleleFrequencyPosteriors[k] = log10LofK + log10AlleleFrequencyPriors[k];
//
// // can we abort early?
// lastK = k;
// maxLog10L = Math.max(maxLog10L, log10LofK);
// if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
// if ( DEBUG ) System.out.printf(" *** breaking early k=%d log10L=%.2f maxLog10L=%.2f%n", k, log10LofK, maxLog10L);
// done = true;
// }
//
// logY.rotate();
// }
//
// return lastK;
}
public int linearExact(Map<String, Genotype> GLs, public int linearExact(Map<String, Genotype> GLs,
double[] log10AlleleFrequencyPriors, double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) { double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) {
@ -605,82 +449,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
return calls; return calls;
} }
// -------------------------------------------------------------------------------------
//
// Gold standard, but O(N^2), implementation.
//
// TODO -- remove me for clarity in this code
//
// -------------------------------------------------------------------------------------
public int gdaN2GoldStandard(Map<String, Genotype> GLs,
double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) {
int numSamples = GLs.size();
int numChr = 2*numSamples;
double[][] logYMatrix = new double[1+numSamples][1+numChr];
for (int i=0; i <=numSamples; i++)
for (int j=0; j <=numChr; j++)
logYMatrix[i][j] = Double.NEGATIVE_INFINITY;
//YMatrix[0][0] = 1.0;
logYMatrix[0][0] = 0.0;
int j=0;
for ( Map.Entry<String, Genotype> sample : GLs.entrySet() ) {
j++;
if ( !sample.getValue().hasLikelihoods() )
continue;
//double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(GLs.get(sample).getLikelihoods());
double[] genotypeLikelihoods = sample.getValue().getLikelihoods().getAsVector();
//double logDenominator = Math.log10(2.0*j*(2.0*j-1));
double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
// special treatment for k=0: iteration reduces to:
//YMatrix[j][0] = YMatrix[j-1][0]*genotypeLikelihoods[GenotypeType.AA.ordinal()];
logYMatrix[j][0] = logYMatrix[j-1][0] + genotypeLikelihoods[idxAA];
for (int k=1; k <= 2*j; k++ ) {
//double num = (2.0*j-k)*(2.0*j-k-1)*YMatrix[j-1][k] * genotypeLikelihoods[GenotypeType.AA.ordinal()];
double logNumerator[];
logNumerator = new double[3];
if (k < 2*j-1)
logNumerator[0] = MathUtils.log10Cache[2*j-k] + MathUtils.log10Cache[2*j-k-1] + logYMatrix[j-1][k] +
genotypeLikelihoods[idxAA];
else
logNumerator[0] = Double.NEGATIVE_INFINITY;
if (k < 2*j)
logNumerator[1] = MathUtils.log10Cache[2*k] + MathUtils.log10Cache[2*j-k]+ logYMatrix[j-1][k-1] +
genotypeLikelihoods[idxAB];
else
logNumerator[1] = Double.NEGATIVE_INFINITY;
if (k > 1)
logNumerator[2] = MathUtils.log10Cache[k] + MathUtils.log10Cache[k-1] + logYMatrix[j-1][k-2] +
genotypeLikelihoods[idxBB];
else
logNumerator[2] = Double.NEGATIVE_INFINITY;
double logNum = MathUtils.softMax(logNumerator);
//YMatrix[j][k] = num/den;
logYMatrix[j][k] = logNum - logDenominator;
}
}
for (int k=0; k <= numChr; k++)
log10AlleleFrequencyPosteriors[k] = logYMatrix[j][k] + log10AlleleFrequencyPriors[k];
return numChr;
}
private final static void printLikelihoods(int numChr, double[][] logYMatrix, double[] log10AlleleFrequencyPriors) { private final static void printLikelihoods(int numChr, double[][] logYMatrix, double[] log10AlleleFrequencyPriors) {
int j = logYMatrix.length - 1; int j = logYMatrix.length - 1;
System.out.printf("-----------------------------------%n"); System.out.printf("-----------------------------------%n");
@ -689,5 +457,4 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
System.out.printf(" %4d\t%8.2f\t%8.2f\t%8.2f%n", k, logYMatrix[j][k], log10AlleleFrequencyPriors[k], posterior); System.out.printf(" %4d\t%8.2f\t%8.2f\t%8.2f%n", k, logYMatrix[j][k], log10AlleleFrequencyPriors[k], posterior);
} }
} }
} }

View File

@ -168,10 +168,6 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "GSA_PRODUCTION_ONLY", shortName = "GSA_PRODUCTION_ONLY", doc = "don't ever use me", required = false) @Argument(fullName = "GSA_PRODUCTION_ONLY", shortName = "GSA_PRODUCTION_ONLY", doc = "don't ever use me", required = false)
public boolean GSA_PRODUCTION_ONLY = false; public boolean GSA_PRODUCTION_ONLY = false;
@Hidden
@Argument(fullName = "exactCalculation", shortName = "exactCalculation", doc = "expt", required = false)
public ExactAFCalculationModel.ExactCalculation EXACT_CALCULATION_TYPE = ExactAFCalculationModel.ExactCalculation.LINEAR_EXPERIMENTAL;
@Hidden @Hidden
@Argument(fullName = "ignoreSNPAlleles", shortName = "ignoreSNPAlleles", doc = "expt", required = false) @Argument(fullName = "ignoreSNPAlleles", shortName = "ignoreSNPAlleles", doc = "expt", required = false)
public boolean IGNORE_SNP_ALLELES = false; public boolean IGNORE_SNP_ALLELES = false;
@ -191,7 +187,6 @@ public class UnifiedArgumentCollection {
uac.GLmodel = GLmodel; uac.GLmodel = GLmodel;
uac.AFmodel = AFmodel; uac.AFmodel = AFmodel;
uac.EXACT_CALCULATION_TYPE = EXACT_CALCULATION_TYPE;
uac.heterozygosity = heterozygosity; uac.heterozygosity = heterozygosity;
uac.PCR_error = PCR_error; uac.PCR_error = PCR_error;
uac.GenotypingMode = GenotypingMode; uac.GenotypingMode = GenotypingMode;

View File

@ -42,6 +42,7 @@ import java.util.*;
* *
* <p>Body test</p> * <p>Body test</p>
*/ */
@Hidden
public class DocumentationTest extends RodWalker<Integer, Integer> { public class DocumentationTest extends RodWalker<Integer, Integer> {
// the docs for the arguments are in the collection // the docs for the arguments are in the collection
@ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();

View File

@ -432,25 +432,37 @@ public class ClippingOp {
} }
private int calculateAlignmentStartShift(Cigar oldCigar, Cigar newCigar) { private int calculateAlignmentStartShift(Cigar oldCigar, Cigar newCigar) {
int shift = 0; int newShift = 0;
int oldShift = 0;
int deletionShift = 0;
// Rewind to previous start (by counting everything that was already clipped in this read)
for (CigarElement cigarElement : oldCigar.getCigarElements()) {
if (!cigarElement.getOperator().consumesReferenceBases())
shift -= cigarElement.getLength();
else
break;
}
// Advance to new start (by counting everything new that has been clipped )
for (CigarElement cigarElement : newCigar.getCigarElements()) { for (CigarElement cigarElement : newCigar.getCigarElements()) {
if (!cigarElement.getOperator().consumesReferenceBases()) if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP)
shift += cigarElement.getLength(); newShift += cigarElement.getLength();
else else
break; break;
} }
return shift; for (CigarElement cigarElement : oldCigar.getCigarElements()) {
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP )
oldShift += Math.min(cigarElement.getLength(), newShift - oldShift);
else
break;
}
int basesClipped = 0;
for (CigarElement cigarElement : oldCigar.getCigarElements()) {
if (basesClipped > newShift) // are we beyond the clipped region?
break;
else if (cigarElement.getOperator() == CigarOperator.DELETION) // if this is a deletion, we have to adjust the starting shift
deletionShift += cigarElement.getLength();
else
basesClipped += cigarElement.getLength();
}
return newShift - oldShift + deletionShift;
} }
private int calculateHardClippingAlignmentShift(CigarElement cigarElement, int clippedLength) { private int calculateHardClippingAlignmentShift(CigarElement cigarElement, int clippedLength) {
@ -462,8 +474,8 @@ public class ClippingOp {
return -clippedLength; return -clippedLength;
} }
if (cigarElement.getOperator() == CigarOperator.DELETION) // if (cigarElement.getOperator() == CigarOperator.DELETION)
return cigarElement.getLength(); // return cigarElement.getLength();
return 0; return 0;
} }

View File

@ -71,21 +71,21 @@ public class ReadClipper {
private SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) { private SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart); int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart);
int stop = (refStop < 0) ? read.getReadLength() - 1: ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop); int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop);
if (start < 0 || stop > read.getReadLength() - 1 + numDeletions(read)) if (start < 0 || stop > read.getReadLength() - 1)
throw new ReviewedStingException("Trying to clip before the start or after the end of a read"); throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
// TODO add check in the Hardclip function if ( start > stop ) {
if ( start > stop ) // stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, ReadUtils.getRefCoordSoftUnclippedEnd(read));
stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, ReadUtils.getRefCoordSoftUnclippedEnd(read)); throw new ReviewedStingException("START > STOP -- this should never happen -- call Mauricio!");
}
//This tries to fix the bug where the deletion is counted a read base and as a result, the hardCLipper runs into //This tries to fix the bug where the deletion is counted a read base and as a result, the hardCLipper runs into
//an endless loop when hard clipping the cigar string because the read coordinates are not covered by the read //an endless loop when hard clipping the cigar string because the read coordinates are not covered by the read
stop -= numDeletions(read); // stop -= numDeletions(read);
if ( start > stop ) // if ( start > stop )
start -= numDeletions(read); // start -= numDeletions(read);
//System.out.println("Clipping start/stop: " + start + "/" + stop); //System.out.println("Clipping start/stop: " + start + "/" + stop);
@ -145,7 +145,7 @@ public class ReadClipper {
cutLeft = readIndex + cigarElement.getLength() - 1; cutLeft = readIndex + cigarElement.getLength() - 1;
} }
} }
else else if (cigarElement.getOperator() != CigarOperator.HARD_CLIP)
rightTail = true; rightTail = true;
if (cigarElement.getOperator().consumesReadBases()) if (cigarElement.getOperator().consumesReadBases())

View File

@ -66,7 +66,8 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {
public boolean includeInDocs(ClassDoc doc) { public boolean includeInDocs(ClassDoc doc) {
try { try {
Class type = HelpUtils.getClassForDoc(doc); Class type = HelpUtils.getClassForDoc(doc);
return JVMUtils.isConcrete(type); boolean hidden = ! getDoclet().showHiddenFeatures() && type.isAnnotationPresent(Hidden.class);
return ! hidden && JVMUtils.isConcrete(type);
} catch ( ClassNotFoundException e ) { } catch ( ClassNotFoundException e ) {
return false; return false;
} }

View File

@ -74,13 +74,6 @@ public class Genotype {
return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attributes, g.isPhased()); return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attributes, g.isPhased());
} }
public static Genotype removePLs(Genotype g) {
Map<String, Object> attrs = new HashMap<String, Object>(g.getAttributes());
attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
attrs.remove(VCFConstants.GENOTYPE_LIKELIHOODS_KEY);
return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attrs, g.isPhased());
}
public static Genotype modifyAlleles(Genotype g, List<Allele> alleles) { public static Genotype modifyAlleles(Genotype g, List<Allele> alleles) {
return new Genotype(g.getSampleName(), alleles, g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, g.getAttributes(), g.isPhased()); return new Genotype(g.getSampleName(), alleles, g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, g.getAttributes(), g.isPhased());
} }

View File

@ -44,6 +44,11 @@ import java.io.Serializable;
import java.util.*; import java.util.*;
public class VariantContextUtils { public class VariantContextUtils {
public final static String MERGE_INTERSECTION = "Intersection";
public final static String MERGE_FILTER_IN_ALL = "FilteredInAll";
public final static String MERGE_REF_IN_ALL = "ReferenceInAll";
public final static String MERGE_FILTER_PREFIX = "filterIn";
final public static JexlEngine engine = new JexlEngine(); final public static JexlEngine engine = new JexlEngine();
static { static {
engine.setSilent(false); // will throw errors now for selects that don't evaluate properly engine.setSilent(false); // will throw errors now for selects that don't evaluate properly
@ -154,6 +159,13 @@ public class VariantContextUtils {
return "%." + precision + "f"; return "%." + precision + "f";
} }
public static Genotype removePLs(Genotype g) {
Map<String, Object> attrs = new HashMap<String, Object>(g.getAttributes());
attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
attrs.remove(VCFConstants.GENOTYPE_LIKELIHOODS_KEY);
return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attrs, g.isPhased());
}
/** /**
* A simple but common wrapper for matching VariantContext objects using JEXL expressions * A simple but common wrapper for matching VariantContext objects using JEXL expressions
*/ */
@ -609,19 +621,20 @@ public class VariantContextUtils {
if ( filteredRecordMergeType == FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED && nFiltered != VCs.size() ) if ( filteredRecordMergeType == FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED && nFiltered != VCs.size() )
filters.clear(); filters.clear();
if ( annotateOrigin ) { // we care about where the call came from if ( annotateOrigin ) { // we care about where the call came from
String setValue; String setValue;
if ( nFiltered == 0 && variantSources.size() == priorityListOfVCs.size() ) // nothing was unfiltered if ( nFiltered == 0 && variantSources.size() == priorityListOfVCs.size() ) // nothing was unfiltered
setValue = "Intersection"; setValue = MERGE_INTERSECTION;
else if ( nFiltered == VCs.size() ) // everything was filtered out else if ( nFiltered == VCs.size() ) // everything was filtered out
setValue = "FilteredInAll"; setValue = MERGE_FILTER_IN_ALL;
else if ( variantSources.isEmpty() ) // everyone was reference else if ( variantSources.isEmpty() ) // everyone was reference
setValue = "ReferenceInAll"; setValue = MERGE_REF_IN_ALL;
else { else {
LinkedHashSet<String> s = new LinkedHashSet<String>(); LinkedHashSet<String> s = new LinkedHashSet<String>();
for ( VariantContext vc : VCs ) for ( VariantContext vc : VCs )
if ( vc.isVariant() ) if ( vc.isVariant() )
s.add( vc.isFiltered() ? "filterIn" + vc.getSource() : vc.getSource() ); s.add( vc.isFiltered() ? MERGE_FILTER_PREFIX + vc.getSource() : vc.getSource() );
setValue = Utils.join("-", s); setValue = Utils.join("-", s);
} }
@ -734,7 +747,7 @@ public class VariantContextUtils {
Map<String, Genotype> newGs = new HashMap<String, Genotype>(genotypes.size()); Map<String, Genotype> newGs = new HashMap<String, Genotype>(genotypes.size());
for ( Map.Entry<String, Genotype> g : genotypes.entrySet() ) { for ( Map.Entry<String, Genotype> g : genotypes.entrySet() ) {
newGs.put(g.getKey(), g.getValue().hasLikelihoods() ? Genotype.removePLs(g.getValue()) : g.getValue()); newGs.put(g.getKey(), g.getValue().hasLikelihoods() ? removePLs(g.getValue()) : g.getValue());
} }
return newGs; return newGs;

View File

@ -132,15 +132,21 @@ public abstract class BaseTest {
*/ */
public static class TestDataProvider { public static class TestDataProvider {
private static final Map<Class, List<Object>> tests = new HashMap<Class, List<Object>>(); private static final Map<Class, List<Object>> tests = new HashMap<Class, List<Object>>();
private final String name;
/** /**
* Create a new TestDataProvider instance bound to the class variable C * Create a new TestDataProvider instance bound to the class variable C
* @param c * @param c
*/ */
public TestDataProvider(Class c) { public TestDataProvider(Class c, String name) {
if ( ! tests.containsKey(c) ) if ( ! tests.containsKey(c) )
tests.put(c, new ArrayList<Object>()); tests.put(c, new ArrayList<Object>());
tests.get(c).add(this); tests.get(c).add(this);
this.name = name;
}
public TestDataProvider(Class c) {
this(c, "");
} }
/** /**
@ -153,6 +159,11 @@ public abstract class BaseTest {
for ( Object x : tests.get(c) ) params2.add(new Object[]{x}); for ( Object x : tests.get(c) ) params2.add(new Object[]{x});
return params2.toArray(new Object[][]{}); return params2.toArray(new Object[][]{});
} }
@Override
public String toString() {
return "TestDataProvider("+name+")";
}
} }
/** /**

View File

@ -33,7 +33,7 @@ public class SymbolicAllelesIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(b36KGReference, "symbolic_alleles_2.vcf"), baseTestString(b36KGReference, "symbolic_alleles_2.vcf"),
1, 1,
Arrays.asList("6645babc8c7d46be0da223477c7b1291")); Arrays.asList("3008d6f5044bc14801e5c58d985dec72"));
executeTest("Test symbolic alleles mixed in with non-symbolic alleles", spec); executeTest("Test symbolic alleles mixed in with non-symbolic alleles", spec);
} }
} }

View File

@ -21,37 +21,25 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE. * OTHER DEALINGS IN THE SOFTWARE.
*/ */
// our package
package org.broadinstitute.sting.utils.variantcontext; package org.broadinstitute.sting.utils.variantcontext;
// the imports for unit testing.
import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.apache.log4j.Priority;
import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.testng.Assert; import org.testng.Assert;
import org.testng.annotations.BeforeSuite; import org.testng.annotations.BeforeSuite;
import org.testng.annotations.Test;
import org.testng.annotations.DataProvider; import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.util.*; import java.util.*;
public class VariantContextUtilsUnitTest extends BaseTest { public class VariantContextUtilsUnitTest extends BaseTest {
Allele Aref, T, delRef, ATC; Allele Aref, T, C, delRef, ATC, ATCATC;
Genotype ref1, snp1, snp2, indel1, indelref;
private GenomeLocParser genomeLocParser; private GenomeLocParser genomeLocParser;
VariantContext refVC, snpVC1, snpVC2, snpVC3, snpVC4, indelVC1, indelVC2, indelVC3;
@BeforeSuite @BeforeSuite
public void setup() { public void setup() {
@ -68,28 +56,35 @@ public class VariantContextUtilsUnitTest extends BaseTest {
Aref = Allele.create("A", true); Aref = Allele.create("A", true);
delRef = Allele.create("-", true); delRef = Allele.create("-", true);
T = Allele.create("T"); T = Allele.create("T");
C = Allele.create("C");
ATC = Allele.create("ATC"); ATC = Allele.create("ATC");
ATCATC = Allele.create("ATCATC");
}
ref1 = new Genotype("ref1", Arrays.asList(Aref, Aref), 5, new double[]{0, 5, 10}); private Genotype makeG(String sample, Allele a1, Allele a2) {
snp1 = new Genotype("snp1", Arrays.asList(Aref,T), 10, new double[]{10, 0, 20}); return new Genotype(sample, Arrays.asList(a1, a2));
snp2 = new Genotype("snp2", Arrays.asList(T,T), 15, new double[]{25, 15, 0}); }
indelref = new Genotype("indelref", Arrays.asList(delRef,delRef), 25, new double[]{0, 25, 30});
indel1 = new Genotype("indel1", Arrays.asList(delRef,ATC), 20, new double[]{20, 0, 30});
refVC = makeVC("refvc", Arrays.asList(Aref), Arrays.asList(ref1)); private Genotype makeG(String sample, Allele a1, Allele a2, double log10pError) {
snpVC1 = makeVC("snpvc1", Arrays.asList(Aref, T), Arrays.asList(snp1)); return new Genotype(sample, Arrays.asList(a1, a2), log10pError);
snpVC2 = makeVC("snpvc2", Arrays.asList(Aref, T), Arrays.asList(snp1, snp2));
snpVC3 = makeVC("snpvc3", Arrays.asList(Aref, T), Arrays.asList(ref1, snp1));
snpVC4 = makeVC("snpvc4", Arrays.asList(Aref, T), Arrays.asList(ref1, snp1, snp2));
indelVC1 = makeVC("indelvc1", Arrays.asList(delRef), Arrays.asList(indelref));
indelVC2 = makeVC("indelvc2", Arrays.asList(delRef, ATC), Arrays.asList(indel1));
indelVC3 = makeVC("indelvc3", Arrays.asList(delRef, ATC), Arrays.asList(indelref, indel1));
} }
private VariantContext makeVC(String source, List<Allele> alleles) { private VariantContext makeVC(String source, List<Allele> alleles) {
return makeVC(source, alleles, null, null); return makeVC(source, alleles, null, null);
} }
private VariantContext makeVC(String source, List<Allele> alleles, Genotype... g1) {
return makeVC(source, alleles, Arrays.asList(g1));
}
private VariantContext makeVC(String source, List<Allele> alleles, String filter) {
return makeVC(source, alleles, filter.equals(".") ? null : new HashSet<String>(Arrays.asList(filter)));
}
private VariantContext makeVC(String source, List<Allele> alleles, Set<String> filters) {
return makeVC(source, alleles, null, filters);
}
private VariantContext makeVC(String source, List<Allele> alleles, Collection<Genotype> genotypes) { private VariantContext makeVC(String source, List<Allele> alleles, Collection<Genotype> genotypes) {
return makeVC(source, alleles, genotypes, null); return makeVC(source, alleles, genotypes, null);
} }
@ -98,45 +93,108 @@ public class VariantContextUtilsUnitTest extends BaseTest {
int start = 10; int start = 10;
int stop = start; // alleles.contains(ATC) ? start + 3 : start; int stop = start; // alleles.contains(ATC) ? start + 3 : start;
return new VariantContext(source, "1", start, stop, alleles, return new VariantContext(source, "1", start, stop, alleles,
VariantContext.genotypeCollectionToMap(new TreeMap<String, Genotype>(), genotypes), genotypes == null ? null : VariantContext.genotypeCollectionToMap(new TreeMap<String, Genotype>(), genotypes),
1.0, filters, null, (byte)'C'); 1.0, filters, null, (byte)'C');
} }
private class SimpleMergeTest extends TestDataProvider { // --------------------------------------------------------------------------------
List<VariantContext> inputVCs; //
VariantContext expectedVC; // Test allele merging
//
// --------------------------------------------------------------------------------
private SimpleMergeTest(VariantContext... vcsArg) { private class MergeAllelesTest extends TestDataProvider {
super(SimpleMergeTest.class); List<List<Allele>> inputs;
LinkedList<VariantContext> allVCs = new LinkedList<VariantContext>(Arrays.asList(vcsArg)); List<Allele> expected;
expectedVC = allVCs.pollLast();
inputVCs = allVCs; private MergeAllelesTest(List<Allele>... arg) {
super(MergeAllelesTest.class);
LinkedList<List<Allele>> all = new LinkedList<List<Allele>>(Arrays.asList(arg));
expected = all.pollLast();
inputs = all;
} }
public String toString() { public String toString() {
return String.format("SimpleMergeTest vc=%s expected=%s", inputVCs, expectedVC); return String.format("MergeAllelesTest input=%s expected=%s", inputs, expected);
} }
} }
@DataProvider(name = "mergeAlleles")
@DataProvider(name = "simplemergedata") public Object[][] mergeAllelesData() {
public Object[][] createSimpleMergeData() {
// first, do no harm // first, do no harm
new SimpleMergeTest(refVC, refVC); new MergeAllelesTest(Arrays.asList(Aref),
new SimpleMergeTest(snpVC1, snpVC1); Arrays.asList(Aref));
new SimpleMergeTest(indelVC1, indelVC1);
new SimpleMergeTest(indelVC3, indelVC3);
new SimpleMergeTest(refVC, snpVC1, snpVC3); new MergeAllelesTest(Arrays.asList(Aref),
new SimpleMergeTest(snpVC1, snpVC2, snpVC2); Arrays.asList(Aref),
new SimpleMergeTest(refVC, snpVC2, snpVC4); Arrays.asList(Aref));
new SimpleMergeTest(indelVC1, indelVC2, indelVC3); new MergeAllelesTest(Arrays.asList(Aref),
new SimpleMergeTest(indelVC1, indelVC3, indelVC3); Arrays.asList(Aref, T),
new SimpleMergeTest(indelVC2, indelVC3, indelVC3); Arrays.asList(Aref, T));
return SimpleMergeTest.getTests(SimpleMergeTest.class); new MergeAllelesTest(Arrays.asList(Aref, C),
Arrays.asList(Aref, T),
Arrays.asList(Aref, C, T));
new MergeAllelesTest(Arrays.asList(Aref, T),
Arrays.asList(Aref, C),
Arrays.asList(Aref, C, T)); // sorted by allele
new MergeAllelesTest(Arrays.asList(Aref, C, T),
Arrays.asList(Aref, C),
Arrays.asList(Aref, C, T));
new MergeAllelesTest(Arrays.asList(Aref, T, C),
Arrays.asList(Aref, C),
Arrays.asList(Aref, C, T)); // sorted by allele
new MergeAllelesTest(Arrays.asList(delRef),
Arrays.asList(delRef)); // todo -- FIXME me GdA
new MergeAllelesTest(Arrays.asList(delRef),
Arrays.asList(delRef, ATC),
Arrays.asList(delRef, ATC));
new MergeAllelesTest(Arrays.asList(delRef),
Arrays.asList(delRef, ATC, ATCATC),
Arrays.asList(delRef, ATC, ATCATC));
new MergeAllelesTest(Arrays.asList(delRef, ATCATC),
Arrays.asList(delRef, ATC, ATCATC),
Arrays.asList(delRef, ATC, ATCATC));
new MergeAllelesTest(Arrays.asList(delRef, ATC),
Arrays.asList(delRef, ATCATC),
Arrays.asList(delRef, ATC, ATCATC));
return MergeAllelesTest.getTests(MergeAllelesTest.class);
} }
@Test(dataProvider = "mergeAlleles")
public void testMergeAlleles(MergeAllelesTest cfg) {
final List<VariantContext> inputs = new ArrayList<VariantContext>();
int i = 0;
for ( final List<Allele> alleles : cfg.inputs ) {
final String name = "vcf" + ++i;
inputs.add(makeVC(name, alleles));
}
final List<String> priority = vcs2priority(inputs);
final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser,
inputs, priority,
VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, "set", false, false);
Assert.assertEquals(merged.getAlleles(), cfg.expected);
}
// --------------------------------------------------------------------------------
//
// Test rsID merging
//
// --------------------------------------------------------------------------------
private class SimpleMergeRSIDTest extends TestDataProvider { private class SimpleMergeRSIDTest extends TestDataProvider {
List<String> inputs; List<String> inputs;
String expected; String expected;
@ -156,10 +214,13 @@ public class VariantContextUtilsUnitTest extends BaseTest {
@DataProvider(name = "simplemergersiddata") @DataProvider(name = "simplemergersiddata")
public Object[][] createSimpleMergeRSIDData() { public Object[][] createSimpleMergeRSIDData() {
new SimpleMergeRSIDTest(".", "."); new SimpleMergeRSIDTest(".", ".");
new SimpleMergeRSIDTest(".", ".", ".");
new SimpleMergeRSIDTest("rs1", "rs1"); new SimpleMergeRSIDTest("rs1", "rs1");
new SimpleMergeRSIDTest("rs1", "rs1", "rs1");
new SimpleMergeRSIDTest(".", "rs1", "rs1"); new SimpleMergeRSIDTest(".", "rs1", "rs1");
new SimpleMergeRSIDTest("rs1", ".", "rs1"); new SimpleMergeRSIDTest("rs1", ".", "rs1");
new SimpleMergeRSIDTest("rs1", "rs2", "rs1,rs2"); new SimpleMergeRSIDTest("rs1", "rs2", "rs1,rs2");
new SimpleMergeRSIDTest("rs1", "rs2", "rs1", "rs1,rs2"); // duplicates
new SimpleMergeRSIDTest("rs2", "rs1", "rs2,rs1"); new SimpleMergeRSIDTest("rs2", "rs1", "rs2,rs1");
new SimpleMergeRSIDTest("rs2", "rs1", ".", "rs2,rs1"); new SimpleMergeRSIDTest("rs2", "rs1", ".", "rs2,rs1");
new SimpleMergeRSIDTest("rs2", ".", "rs1", "rs2,rs1"); new SimpleMergeRSIDTest("rs2", ".", "rs1", "rs2,rs1");
@ -171,32 +232,312 @@ public class VariantContextUtilsUnitTest extends BaseTest {
@Test(dataProvider = "simplemergersiddata") @Test(dataProvider = "simplemergersiddata")
public void testRSIDMerge(SimpleMergeRSIDTest cfg) { public void testRSIDMerge(SimpleMergeRSIDTest cfg) {
List<VariantContext> inputs = new ArrayList<VariantContext>(); final VariantContext snpVC1 = makeVC("snpvc1", Arrays.asList(Aref, T));
for ( String id : cfg.inputs ) { final List<VariantContext> inputs = new ArrayList<VariantContext>();
for ( final String id : cfg.inputs ) {
MutableVariantContext vc = new MutableVariantContext(snpVC1); MutableVariantContext vc = new MutableVariantContext(snpVC1);
if ( ! id.equals(".") ) vc.setID(id); if ( ! id.equals(".") ) vc.setID(id);
inputs.add(vc); inputs.add(vc);
} }
VariantContext merged = myMerge(inputs); final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser,
inputs, null,
VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
VariantContextUtils.GenotypeMergeType.UNSORTED, false, false, "set", false, false);
Assert.assertEquals(merged.getID(), cfg.expected.equals(".") ? null : cfg.expected); Assert.assertEquals(merged.getID(), cfg.expected.equals(".") ? null : cfg.expected);
} }
private VariantContext myMerge(List<VariantContext> inputs) { // --------------------------------------------------------------------------------
List<String> priority = new ArrayList<String>(); //
for ( VariantContext vc : inputs ) priority.add(vc.getSource()); // Test filtered merging
//
// --------------------------------------------------------------------------------
return VariantContextUtils.simpleMerge(genomeLocParser, private class MergeFilteredTest extends TestDataProvider {
inputs, priority, List<VariantContext> inputs;
VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContext expected;
VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false); String setExpected;
VariantContextUtils.FilteredRecordMergeType type;
private MergeFilteredTest(String name, VariantContext input1, VariantContext input2, VariantContext expected, String setExpected) {
this(name, input1, input2, expected, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, setExpected);
}
private MergeFilteredTest(String name, VariantContext input1, VariantContext input2, VariantContext expected, VariantContextUtils.FilteredRecordMergeType type, String setExpected) {
super(MergeFilteredTest.class, name);
LinkedList<VariantContext> all = new LinkedList<VariantContext>(Arrays.asList(input1, input2));
this.expected = expected;
this.type = type;
inputs = all;
this.setExpected = setExpected;
}
public String toString() {
return String.format("%s input=%s expected=%s", super.toString(), inputs, expected);
}
} }
// todo -- add tests for subset merging, especially with correct PLs @DataProvider(name = "mergeFiltered")
// todo -- test priority list public Object[][] mergeFilteredData() {
// todo -- test FilteredRecordMergeType new MergeFilteredTest("AllPass",
// todo -- no annotate origin makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
// todo -- test set key makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
// todo -- test filtered are uncalled makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
VariantContextUtils.MERGE_INTERSECTION);
new MergeFilteredTest("noFilters",
makeVC("1", Arrays.asList(Aref, T), "."),
makeVC("2", Arrays.asList(Aref, T), "."),
makeVC("3", Arrays.asList(Aref, T), "."),
VariantContextUtils.MERGE_INTERSECTION);
new MergeFilteredTest("oneFiltered",
makeVC("1", Arrays.asList(Aref, T), "."),
makeVC("2", Arrays.asList(Aref, T), "FAIL"),
makeVC("3", Arrays.asList(Aref, T), "."),
String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX));
new MergeFilteredTest("onePassOneFail",
makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
makeVC("2", Arrays.asList(Aref, T), "FAIL"),
makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX));
new MergeFilteredTest("AllFiltered",
makeVC("1", Arrays.asList(Aref, T), "FAIL"),
makeVC("2", Arrays.asList(Aref, T), "FAIL"),
makeVC("3", Arrays.asList(Aref, T), "FAIL"),
VariantContextUtils.MERGE_FILTER_IN_ALL);
// test ALL vs. ANY
new MergeFilteredTest("FailOneUnfiltered",
makeVC("1", Arrays.asList(Aref, T), "FAIL"),
makeVC("2", Arrays.asList(Aref, T), "."),
makeVC("3", Arrays.asList(Aref, T), "."),
VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX));
new MergeFilteredTest("OneFailAllUnfilteredArg",
makeVC("1", Arrays.asList(Aref, T), "FAIL"),
makeVC("2", Arrays.asList(Aref, T), "."),
makeVC("3", Arrays.asList(Aref, T), "FAIL"),
VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ALL_UNFILTERED,
String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX));
// test excluding allele in filtered record
new MergeFilteredTest("DontIncludeAlleleOfFilteredRecords",
makeVC("1", Arrays.asList(Aref, T), "."),
makeVC("2", Arrays.asList(Aref, T), "FAIL"),
makeVC("3", Arrays.asList(Aref, T), "."),
String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX));
// promotion of site from unfiltered to PASSES
new MergeFilteredTest("UnfilteredPlusPassIsPass",
makeVC("1", Arrays.asList(Aref, T), "."),
makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
VariantContextUtils.MERGE_INTERSECTION);
new MergeFilteredTest("RefInAll",
makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS),
makeVC("2", Arrays.asList(Aref), VariantContext.PASSES_FILTERS),
makeVC("3", Arrays.asList(Aref), VariantContext.PASSES_FILTERS),
VariantContextUtils.MERGE_REF_IN_ALL);
new MergeFilteredTest("RefInOne",
makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS),
makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
"2");
return MergeFilteredTest.getTests(MergeFilteredTest.class);
}
@Test(dataProvider = "mergeFiltered")
public void testMergeFiltered(MergeFilteredTest cfg) {
final List<String> priority = vcs2priority(cfg.inputs);
final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser,
cfg.inputs, priority, cfg.type, VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false);
// test alleles are equal
Assert.assertEquals(merged.getAlleles(), cfg.expected.getAlleles());
// test set field
Assert.assertEquals(merged.getAttribute("set"), cfg.setExpected);
// test filter field
Assert.assertEquals(merged.getFilters(), cfg.expected.getFilters());
}
// --------------------------------------------------------------------------------
//
// Test genotype merging
//
// --------------------------------------------------------------------------------
private class MergeGenotypesTest extends TestDataProvider {
List<VariantContext> inputs;
VariantContext expected;
List<String> priority;
private MergeGenotypesTest(String name, String priority, VariantContext... arg) {
super(MergeGenotypesTest.class, name);
LinkedList<VariantContext> all = new LinkedList<VariantContext>(Arrays.asList(arg));
this.expected = all.pollLast();
inputs = all;
this.priority = Arrays.asList(priority.split(","));
}
public String toString() {
return String.format("%s input=%s expected=%s", super.toString(), inputs, expected);
}
}
@DataProvider(name = "mergeGenotypes")
public Object[][] mergeGenotypesData() {
new MergeGenotypesTest("TakeGenotypeByPriority-1,2", "1,2",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)),
makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2)),
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)));
new MergeGenotypesTest("TakeGenotypeByPriority-1,2-nocall", "1,2",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Allele.NO_CALL, Allele.NO_CALL, 1)),
makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2)),
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Allele.NO_CALL, Allele.NO_CALL, 1)));
new MergeGenotypesTest("TakeGenotypeByPriority-2,1", "2,1",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)),
makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2)),
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2)));
new MergeGenotypesTest("NonOverlappingGenotypes", "1,2",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)),
makeVC("2", Arrays.asList(Aref, T), makeG("s2", Aref, T, 2)),
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1), makeG("s2", Aref, T, 2)));
new MergeGenotypesTest("PreserveNoCall", "1,2",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Allele.NO_CALL, Allele.NO_CALL, 1)),
makeVC("2", Arrays.asList(Aref, T), makeG("s2", Aref, T, 2)),
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Allele.NO_CALL, Allele.NO_CALL, 1), makeG("s2", Aref, T, 2)));
new MergeGenotypesTest("PerserveAlleles", "1,2",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)),
makeVC("2", Arrays.asList(Aref, C), makeG("s2", Aref, C, 2)),
makeVC("3", Arrays.asList(Aref, C, T), makeG("s1", Aref, T, 1), makeG("s2", Aref, C, 2)));
new MergeGenotypesTest("TakeGenotypePartialOverlap-1,2", "1,2",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)),
makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2), makeG("s3", Aref, T, 3)),
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1), makeG("s3", Aref, T, 3)));
new MergeGenotypesTest("TakeGenotypePartialOverlap-2,1", "2,1",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)),
makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2), makeG("s3", Aref, T, 3)),
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2), makeG("s3", Aref, T, 3)));
// todo -- GDA -- add tests for merging correct PLs
return MergeGenotypesTest.getTests(MergeGenotypesTest.class);
}
@Test(dataProvider = "mergeGenotypes")
public void testMergeGenotypes(MergeGenotypesTest cfg) {
final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser,
cfg.inputs, cfg.priority, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false);
// test alleles are equal
Assert.assertEquals(merged.getAlleles(), cfg.expected.getAlleles());
// test genotypes
assertGenotypesAreMostlyEqual(merged.getGenotypes(), cfg.expected.getGenotypes());
}
// necessary to not overload equals for genotypes
private void assertGenotypesAreMostlyEqual(Map<String, Genotype> actual, Map<String, Genotype> expected) {
if (actual == expected) {
return;
}
if (actual == null || expected == null) {
Assert.fail("Maps not equal: expected: " + expected + " and actual: " + actual);
}
if (actual.size() != expected.size()) {
Assert.fail("Maps do not have the same size:" + actual.size() + " != " + expected.size());
}
for (Map.Entry<String, Genotype> entry : actual.entrySet()) {
String key = entry.getKey();
Genotype value = entry.getValue();
Genotype expectedValue = expected.get(key);
Assert.assertEquals(value.alleles, expectedValue.alleles);
Assert.assertEquals(value.getNegLog10PError(), expectedValue.getNegLog10PError());
Assert.assertEquals(value.hasLikelihoods(), expectedValue.hasLikelihoods());
if ( value.hasLikelihoods() )
Assert.assertEquals(value.getLikelihoods(), expectedValue.getLikelihoods());
}
}
@Test
public void testMergeGenotypesUniquify() {
final VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1));
final VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2));
final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser,
Arrays.asList(vc1, vc2), null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
VariantContextUtils.GenotypeMergeType.UNIQUIFY, false, false, "set", false, false);
// test genotypes
Assert.assertEquals(merged.getGenotypes().keySet(), new HashSet<String>(Arrays.asList("s1.1", "s1.2")));
}
@Test(expectedExceptions = UserException.class)
public void testMergeGenotypesRequireUnique() {
final VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1));
final VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2));
final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser,
Arrays.asList(vc1, vc2), null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE, false, false, "set", false, false);
}
// --------------------------------------------------------------------------------
//
// Misc. tests
//
// --------------------------------------------------------------------------------
@Test
public void testAnnotationSet() {
for ( final boolean annotate : Arrays.asList(true, false)) {
for ( final String set : Arrays.asList("set", "combine", "x")) {
final List<String> priority = Arrays.asList("1", "2");
VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS);
VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS);
final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser,
Arrays.asList(vc1, vc2), priority, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
VariantContextUtils.GenotypeMergeType.PRIORITIZE, annotate, false, set, false, false);
if ( annotate )
Assert.assertEquals(merged.getAttribute(set), VariantContextUtils.MERGE_INTERSECTION);
else
Assert.assertFalse(merged.hasAttribute(set));
}
}
}
private static final List<String> vcs2priority(final Collection<VariantContext> vcs) {
final List<String> priority = new ArrayList<String>();
for ( final VariantContext vc : vcs ) {
priority.add(vc.getSource());
}
return priority;
}
} }