Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Matt Hanna 2011-09-23 14:53:28 -04:00
commit e388c357ca
42 changed files with 889 additions and 685 deletions

View File

@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.examples;
import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.commandline.ArgumentCollection;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
@ -59,6 +60,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
* @author Your Name * @author Your Name
* @since Date created * @since Date created
*/ */
@Hidden
public class GATKDocsExample extends RodWalker<Integer, Integer> { public class GATKDocsExample extends RodWalker<Integer, Integer> {
/** /**
* Put detailed documentation about the argument here. No need to duplicate the summary information * Put detailed documentation about the argument here. No need to duplicate the summary information

View File

@ -43,6 +43,9 @@ import java.util.List;
import java.util.Map; import java.util.Map;
/**
* The allele balance (fraction of ref bases over ref + alt bases) across all bialleleic het-called samples
*/
public class AlleleBalance extends InfoFieldAnnotation { public class AlleleBalance extends InfoFieldAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -16,6 +16,9 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*; import java.util.*;
/**
* The allele balance (fraction of ref bases over ref + alt bases) separately for each bialleleic het-called sample
*/
public class AlleleBalanceBySample extends GenotypeAnnotation implements ExperimentalAnnotation { public class AlleleBalanceBySample extends GenotypeAnnotation implements ExperimentalAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {

View File

@ -6,8 +6,9 @@ import org.broadinstitute.sting.utils.variantcontext.Genotype;
import java.util.Map; import java.util.Map;
/**
* Abstract base class for all annotations that are normalized by depth
*/
public abstract class AnnotationByDepth extends InfoFieldAnnotation { public abstract class AnnotationByDepth extends InfoFieldAnnotation {

View File

@ -47,6 +47,9 @@ import java.util.List;
import java.util.Map; import java.util.Map;
/**
* Count of A, C, G, T bases across all samples
*/
public class BaseCounts extends InfoFieldAnnotation { public class BaseCounts extends InfoFieldAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -13,6 +13,9 @@ import java.util.LinkedHashMap;
import java.util.List; import java.util.List;
/**
* The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for base qualities (ref bases vs. bases of the alternate allele)
*/
public class BaseQualityRankSumTest extends RankSumTest { public class BaseQualityRankSumTest extends RankSumTest {
public List<String> getKeyNames() { return Arrays.asList("BaseQRankSum"); } public List<String> getKeyNames() { return Arrays.asList("BaseQRankSum"); }

View File

@ -44,6 +44,11 @@ import java.util.List;
import java.util.Map; import java.util.Map;
/**
* Allele count in genotypes, for each ALT allele, in the same order as listed;
* allele Frequency, for each ALT allele, in the same order as listed; total number
* of alleles in called genotypes.
*/
public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation { public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation {
private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY }; private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY };

View File

@ -16,7 +16,12 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
/**
* Total (unfiltered) depth over all samples.
*
* Affected by downsampling (-dcov) though, so the max value one can obtain for N samples with -dcov D
* is N * D
*/
public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation { public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -23,6 +23,25 @@ import java.util.List;
import java.util.Map; import java.util.Map;
/**
* The depth of coverage of each VCF allele in this sample
*
* Complementary fields that two important ways of thinking about the depth of the data for this sample
* at this site. The DP field describe the total depth of reads that passed the Unified Genotypers internal
* quality control metrics (like MAPQ > 17, for example), whatever base was present in the read at this site.
* The AD values (one for each of REF and ALT fields) is the count of all reads that carried with them the
* REF and ALT alleles. The reason for this distinction is that the DP is in some sense reflective of the
* power I have to determine the genotype of the sample at this site, while the AD tells me how many times
* I saw each of the REF and ALT alleles in the reads, free of any bias potentially introduced by filtering
* the reads. If, for example, I believe there really is a an A/T polymorphism at a site, then I would like
* to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would
* normally be excluded from the statistical calculations going into GQ and QUAL. Please note, however, that
* the AD isn't necessarily calculated exactly for indels (it counts as non-reference only those indels that
* are actually present and correctly left-aligned in the alignments themselves). Because of this fact and
* because the AD includes reads and bases that were filtered by the Unified Genotyper, one should not base
* assumptions about the underlying genotype based on it; instead, the genotype likelihoods (PLs) are what
* determine the genotype calls (see below).
*/
public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation { public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation {
private static String REF_ALLELE = "REF"; private static String REF_ALLELE = "REF";

View File

@ -43,6 +43,11 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*; import java.util.*;
/**
* Phred-scaled p-value using Fisher's Exact Test to detect strand bias (the variation
* being seen on only the forward or only the reverse strand) in the reads? More bias is
* indicative of false positive calls.
*/
public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation { public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation {
private static final String FS = "FS"; private static final String FS = "FS";
private static final double MIN_PVALUE = 1E-320; private static final double MIN_PVALUE = 1E-320;

View File

@ -17,6 +17,9 @@ import java.util.List;
import java.util.Map; import java.util.Map;
/**
* The GC content (# GC bases / # all bases) of the reference within 50 bp +/- this site
*/
public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnotation { public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -49,6 +49,10 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*; import java.util.*;
/**
* Consistency of the site with two (and only two) segregating haplotypes. Higher scores
* are indicative of regions with bad alignments, often leading to artifactual SNP and indel calls.
*/
public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation { public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation {
private final static boolean DEBUG = false; private final static boolean DEBUG = false;
private final static int MIN_CONTEXT_WING_SIZE = 10; private final static int MIN_CONTEXT_WING_SIZE = 10;

View File

@ -19,6 +19,9 @@ import java.util.List;
import java.util.Map; import java.util.Map;
/**
* Phred-scaled P value of genotype-based (using GT field) test for Hardy-Weinberg test for disequilibrium
*/
public class HardyWeinberg extends InfoFieldAnnotation implements WorkInProgressAnnotation { public class HardyWeinberg extends InfoFieldAnnotation implements WorkInProgressAnnotation {
private static final int MIN_SAMPLES = 10; private static final int MIN_SAMPLES = 10;

View File

@ -16,7 +16,9 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
/**
* Largest contiguous homopolymer run of the variant allele in either direction on the reference.
*/
public class HomopolymerRun extends InfoFieldAnnotation implements StandardAnnotation { public class HomopolymerRun extends InfoFieldAnnotation implements StandardAnnotation {
private boolean ANNOTATE_INDELS = true; private boolean ANNOTATE_INDELS = true;

View File

@ -17,14 +17,15 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
/**
* Created by IntelliJ IDEA.
* User: rpoplin
* Date: 5/16/11
*/
// A set of annotations calculated directly from the GLs /**
public class GLstats extends InfoFieldAnnotation implements StandardAnnotation { * Likelihood-based (using PL field) test for the inbreeding among samples.
*
* A continuous generalization of the Hardy-Weinberg test for disequilibrium that works
* well with limited coverage per sample. See the 1000 Genomes Phase I release for
* more information.
*/
public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnnotation {
private static final int MIN_SAMPLES = 10; private static final int MIN_SAMPLES = 10;

View File

@ -14,11 +14,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*; import java.util.*;
/** /**
* Created by IntelliJ IDEA. * Rough category of indel type (insertion, deletion, multi-allelic, other)
* User: delangel
* Date: Mar 11, 2011
* Time: 11:47:33 AM
* To change this template use File | Settings | File Templates.
*/ */
public class IndelType extends InfoFieldAnnotation implements ExperimentalAnnotation { public class IndelType extends InfoFieldAnnotation implements ExperimentalAnnotation {

View File

@ -17,6 +17,9 @@ import java.util.List;
import java.util.Map; import java.util.Map;
/**
* Triplet annotation: fraction of MAQP == 0, MAPQ < 10, and count of all mapped reads
*/
public class LowMQ extends InfoFieldAnnotation { public class LowMQ extends InfoFieldAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -14,6 +14,9 @@ import java.util.LinkedHashMap;
import java.util.List; import java.util.List;
/**
* The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for mapping qualities (reads with ref bases vs. those with the alternate allele)
*/
public class MappingQualityRankSumTest extends RankSumTest { public class MappingQualityRankSumTest extends RankSumTest {
public List<String> getKeyNames() { return Arrays.asList("MQRankSum"); } public List<String> getKeyNames() { return Arrays.asList("MQRankSum"); }

View File

@ -19,6 +19,9 @@ import java.util.List;
import java.util.Map; import java.util.Map;
/**
* Total count across all samples of mapping quality zero reads
*/
public class MappingQualityZero extends InfoFieldAnnotation implements StandardAnnotation { public class MappingQualityZero extends InfoFieldAnnotation implements StandardAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -44,11 +44,7 @@ import java.util.List;
import java.util.Map; import java.util.Map;
/** /**
* Created by IntelliJ IDEA. * Count for each sample of mapping quality zero reads
* User: asivache
* Date: Feb 4, 2011
* Time: 6:46:25 PM
* To change this template use File | Settings | File Templates.
*/ */
public class MappingQualityZeroBySample extends GenotypeAnnotation { public class MappingQualityZeroBySample extends GenotypeAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, public Map<String, Object> annotate(RefMetaDataTracker tracker,

View File

@ -17,8 +17,9 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
/**
* Fraction of all reads across samples that have mapping quality zero
*/
public class MappingQualityZeroFraction extends InfoFieldAnnotation implements ExperimentalAnnotation { public class MappingQualityZeroFraction extends InfoFieldAnnotation implements ExperimentalAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -17,11 +17,8 @@ import java.util.List;
import java.util.Map; import java.util.Map;
/** /**
* Created by IntelliJ IDEA. * The number of N bases, counting only SOLiD data
* User: rpoplin
* Date: 5/16/11
*/ */
public class NBaseCount extends InfoFieldAnnotation { public class NBaseCount extends InfoFieldAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if( stratifiedContexts.size() == 0 ) if( stratifiedContexts.size() == 0 )

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.annotator; package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -15,7 +16,11 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
/**
* Variant confidence (given as (AB+BB)/AA from the PLs) / unfiltered depth.
*
* Low scores are indicative of false positive calls and artifacts.
*/
public class QualByDepth extends AnnotationByDepth implements StandardAnnotation { public class QualByDepth extends AnnotationByDepth implements StandardAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -21,6 +21,9 @@ import java.util.List;
import java.util.Map; import java.util.Map;
/**
* Root Mean Square of the mapping quality of the reads across all samples.
*/
public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAnnotation { public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -21,7 +21,9 @@ import java.util.List;
import java.util.Map; import java.util.Map;
/**
* Abstract root for all RankSum based annotations
*/
public abstract class RankSumTest extends InfoFieldAnnotation implements StandardAnnotation { public abstract class RankSumTest extends InfoFieldAnnotation implements StandardAnnotation {
static final double INDEL_LIKELIHOOD_THRESH = 0.1; static final double INDEL_LIKELIHOOD_THRESH = 0.1;
static final boolean DEBUG = false; static final boolean DEBUG = false;

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator; package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -47,12 +48,9 @@ import java.util.List;
import java.util.Map; import java.util.Map;
/** /**
* Created by IntelliJ IDEA. * Unsupported
* User: asivache
* Date: Feb 4, 2011
* Time: 3:59:27 PM
* To change this template use File | Settings | File Templates.
*/ */
@Hidden
public class ReadDepthAndAllelicFractionBySample extends GenotypeAnnotation { public class ReadDepthAndAllelicFractionBySample extends GenotypeAnnotation {
private static String REF_ALLELE = "REF"; private static String REF_ALLELE = "REF";

View File

@ -19,11 +19,8 @@ import java.util.LinkedHashMap;
import java.util.List; import java.util.List;
/** /**
* Created by IntelliJ IDEA. * The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for the distance from the end of the read for reads with the alternate allele; if the alternate allele is only seen near the ends of reads this is indicative of error).
* User: rpoplin
* Date: 3/30/11
*/ */
public class ReadPosRankSumTest extends RankSumTest { public class ReadPosRankSumTest extends RankSumTest {
public List<String> getKeyNames() { return Arrays.asList("ReadPosRankSum"); } public List<String> getKeyNames() { return Arrays.asList("ReadPosRankSum"); }

View File

@ -15,8 +15,9 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
/**
* SB annotation value by depth of alt containing samples
*/
public class SBByDepth extends AnnotationByDepth { public class SBByDepth extends AnnotationByDepth {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -41,7 +41,9 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
/**
* List all of the samples in the info field
*/
public class SampleList extends InfoFieldAnnotation { public class SampleList extends InfoFieldAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -17,6 +17,9 @@ import java.util.List;
import java.util.Map; import java.util.Map;
/**
* Fraction of reads containing spanning deletions at this site.
*/
public class SpanningDeletions extends InfoFieldAnnotation implements StandardAnnotation { public class SpanningDeletions extends InfoFieldAnnotation implements StandardAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.annotator; package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -19,12 +20,9 @@ import java.util.List;
import java.util.Map; import java.util.Map;
/** /**
* Created by IntelliJ IDEA. * Counts of bases from SLX, 454, and SOLiD at this site
* User: delangel
* Date: 6/29/11
* Time: 3:14 PM
* To change this template use File | Settings | File Templates.
*/ */
@Hidden
public class TechnologyComposition extends InfoFieldAnnotation implements ExperimentalAnnotation { public class TechnologyComposition extends InfoFieldAnnotation implements ExperimentalAnnotation {
private String nSLX = "NumSLX"; private String nSLX = "NumSLX";
private String n454 ="Num454"; private String n454 ="Num454";

View File

@ -48,27 +48,12 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
// code for testing purposes // code for testing purposes
// //
private final static boolean DEBUG = false; private final static boolean DEBUG = false;
private final static boolean PRINT_LIKELIHOODS = false;
private final static int N_CYCLES = 1;
private SimpleTimer timerExpt = new SimpleTimer("linearExactBanded");
private SimpleTimer timerGS = new SimpleTimer("linearExactGS");
private final static boolean COMPARE_TO_GS = false;
public enum ExactCalculation {
N2_GOLD_STANDARD,
LINEAR_EXPERIMENTAL
}
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6 private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
private final boolean SIMPLE_GREEDY_GENOTYPER = false;
private boolean SIMPLE_GREEDY_GENOTYPER = false;
private final static double SUM_GL_THRESH_NOCALL = -0.001; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call. private final static double SUM_GL_THRESH_NOCALL = -0.001; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
final private ExactCalculation calcToUse;
protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) { protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
super(UAC, N, logger, verboseWriter); super(UAC, N, logger, verboseWriter);
calcToUse = UAC.EXACT_CALCULATION_TYPE;
} }
public void getLog10PNonRef(RefMetaDataTracker tracker, public void getLog10PNonRef(RefMetaDataTracker tracker,
@ -76,43 +61,12 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
Map<String, Genotype> GLs, Set<Allele>alleles, Map<String, Genotype> GLs, Set<Allele>alleles,
double[] log10AlleleFrequencyPriors, double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors) { double[] log10AlleleFrequencyPosteriors) {
// todo -- REMOVE ME AFTER TESTING final int numAlleles = alleles.size();
// todo -- REMOVE ME AFTER TESTING final double[][] posteriorCache = numAlleles > 2 ? new double[numAlleles-1][] : null;
// todo -- REMOVE ME AFTER TESTING final double[] bestAFguess = numAlleles > 2 ? new double[numAlleles-1] : null;
double[] gsPosteriors;
if ( COMPARE_TO_GS ) // due to annoying special values in incoming array, we have to clone up here
gsPosteriors = log10AlleleFrequencyPosteriors.clone();
int idxAA = GenotypeType.AA.ordinal();
int idxAB = GenotypeType.AB.ordinal();
int idxBB = GenotypeType.BB.ordinal();
// todo -- remove me after testing
if ( N_CYCLES > 1 ) {
for ( int i = 0; i < N_CYCLES; i++) {
timerGS.restart();
linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.clone(), idxAA, idxAB, idxBB);
timerGS.stop();
timerExpt.restart();
linearExactBanded(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.clone());
timerExpt.stop();
}
System.out.printf("good = %.2f, expt = %.2f, delta = %.2f%n",
timerGS.getElapsedTime(), timerExpt.getElapsedTime(), timerExpt.getElapsedTime()-timerGS.getElapsedTime());
}
int lastK = -1;
int numAlleles = alleles.size();
int idxDiag = numAlleles; int idxDiag = numAlleles;
int incr = numAlleles - 1; int incr = numAlleles - 1;
double[][] posteriorCache = new double[numAlleles-1][];
double[] bestAFguess = new double[numAlleles-1];
for (int k=1; k < numAlleles; k++) { for (int k=1; k < numAlleles; k++) {
// multi-allelic approximation, part 1: Ideally // multi-allelic approximation, part 1: Ideally
// for each alt allele compute marginal (suboptimal) posteriors - // for each alt allele compute marginal (suboptimal) posteriors -
@ -121,24 +75,17 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
// So, for example, with 2 alt alleles, likelihoods have AA,AB,AC,BB,BC,CC. // So, for example, with 2 alt alleles, likelihoods have AA,AB,AC,BB,BC,CC.
// 3 alt alleles: AA,AB,AC,AD BB BC BD CC CD DD // 3 alt alleles: AA,AB,AC,AD BB BC BD CC CD DD
idxAA = 0; final int idxAA = 0;
idxAB = k; final int idxAB = k;
// yy is always element on the diagonal. // yy is always element on the diagonal.
// 2 alleles: BBelement 2 // 2 alleles: BBelement 2
// 3 alleles: BB element 3. CC element 5 // 3 alleles: BB element 3. CC element 5
// 4 alleles: // 4 alleles:
idxBB = idxDiag; final int idxBB = idxDiag;
idxDiag += incr--; idxDiag += incr--;
// todo - possible cleanup final int lastK = linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB);
switch ( calcToUse ) {
case N2_GOLD_STANDARD:
lastK = gdaN2GoldStandard(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB);
break;
case LINEAR_EXPERIMENTAL:
lastK = linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB);
break;
}
if (numAlleles > 2) { if (numAlleles > 2) {
posteriorCache[k-1] = log10AlleleFrequencyPosteriors.clone(); posteriorCache[k-1] = log10AlleleFrequencyPosteriors.clone();
bestAFguess[k-1] = (double)MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors); bestAFguess[k-1] = (double)MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors);
@ -153,39 +100,14 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
log10AlleleFrequencyPosteriors[k] = (posteriorCache[mostLikelyAlleleIdx][k]); log10AlleleFrequencyPosteriors[k] = (posteriorCache[mostLikelyAlleleIdx][k]);
} }
// todo -- REMOVE ME AFTER TESTING
// todo -- REMOVE ME AFTER TESTING
// todo -- REMOVE ME AFTER TESTING
if ( COMPARE_TO_GS ) {
gdaN2GoldStandard(GLs, log10AlleleFrequencyPriors, gsPosteriors, idxAA, idxAB, idxBB);
double log10thisPVar = Math.log10(MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors)[0]);
double log10gsPVar = Math.log10(MathUtils.normalizeFromLog10(gsPosteriors)[0]);
boolean eq = (log10thisPVar == Double.NEGATIVE_INFINITY && log10gsPVar == Double.NEGATIVE_INFINITY) || MathUtils.compareDoubles(log10thisPVar, log10gsPVar, 1e-4) == 0;
if ( ! eq || PRINT_LIKELIHOODS ) {
System.out.printf("----------------------------------------%n");
for (int k=0; k < log10AlleleFrequencyPosteriors.length; k++) {
double x = log10AlleleFrequencyPosteriors[k];
System.out.printf(" %d\t%.2f\t%.2f\t%b%n", k,
x < -1e10 ? Double.NEGATIVE_INFINITY : x, gsPosteriors[k],
log10AlleleFrequencyPosteriors[k] == gsPosteriors[k]);
}
System.out.printf("MAD_AC\t%d\t%d\t%.2f\t%.2f\t%.6f%n",
ref.getLocus().getStart(), lastK, log10thisPVar, log10gsPVar, log10thisPVar - log10gsPVar);
}
}
} }
private static final ArrayList<double[]> getGLs(Map<String, Genotype> GLs) { private static final ArrayList<double[]> getGLs(Map<String, Genotype> GLs) {
ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>(); ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>();
//int j = 0;
genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy
for ( Genotype sample : GLs.values() ) { for ( Genotype sample : GLs.values() ) {
if ( sample.hasLikelihoods() ) { if ( sample.hasLikelihoods() ) {
//double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(GLs.get(sample).getLikelihoods());
double[] gls = sample.getLikelihoods().getAsVector(); double[] gls = sample.getLikelihoods().getAsVector();
if (MathUtils.sum(gls) < SUM_GL_THRESH_NOCALL) if (MathUtils.sum(gls) < SUM_GL_THRESH_NOCALL)
@ -240,84 +162,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
} }
} }
// now with banding
public int linearExactBanded(Map<String, Genotype> GLs,
double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors) {
throw new NotImplementedException();
// final int numSamples = GLs.size();
// final int numChr = 2*numSamples;
// final double[][] genotypeLikelihoods = getGLs(GLs);
//
// final ExactACCache logY = new ExactACCache(numSamples+1);
// logY.getkMinus0()[0] = 0.0; // the zero case
//
// double maxLog10L = Double.NEGATIVE_INFINITY;
// boolean done = false;
// int lastK = -1;
// final int BAND_SIZE = 10;
//
// for (int k=0; k <= numChr && ! done; k++ ) {
// final double[] kMinus0 = logY.getkMinus0();
// int jStart = Math.max(k - BAND_SIZE, 1);
// int jStop = Math.min(k + BAND_SIZE, numSamples);
//
// if ( k == 0 ) { // special case for k = 0
// for ( int j=1; j <= numSamples; j++ ) {
// kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods[j][GenotypeType.AA.ordinal()];
// }
// } else { // k > 0
// final double[] kMinus1 = logY.getkMinus1();
// final double[] kMinus2 = logY.getkMinus2();
// Arrays.fill(kMinus0,0);
//
// for ( int j = jStart; j <= jStop; j++ ) {
// final double[] gl = genotypeLikelihoods[j];
// final double logDenominator = log10Cache[2*j] + log10Cache[2*j-1];
//
// double aa = Double.NEGATIVE_INFINITY;
// double ab = Double.NEGATIVE_INFINITY;
// if (k < 2*j-1)
// aa = log10Cache[2*j-k] + log10Cache[2*j-k-1] + kMinus0[j-1] + gl[GenotypeType.AA.ordinal()];
//
// if (k < 2*j)
// ab = log10Cache[2*k] + log10Cache[2*j-k]+ kMinus1[j-1] + gl[GenotypeType.AB.ordinal()];
//
// double log10Max;
// if (k > 1) {
// final double bb = log10Cache[k] + log10Cache[k-1] + kMinus2[j-1] + gl[GenotypeType.BB.ordinal()];
// log10Max = approximateLog10SumLog10(aa, ab, bb);
// } else {
// // we know we aren't considering the BB case, so we can use an optimized log10 function
// log10Max = approximateLog10SumLog10(aa, ab);
// }
//
// // finally, update the L(j,k) value
// kMinus0[j] = log10Max - logDenominator;
//
// String offset = Utils.dupString(' ',k);
// System.out.printf("%s%3d %3d %.2f%n", offset, k, j, kMinus0[j]);
// }
// }
//
// // update the posteriors vector
// final double log10LofK = kMinus0[jStop];
// log10AlleleFrequencyPosteriors[k] = log10LofK + log10AlleleFrequencyPriors[k];
//
// // can we abort early?
// lastK = k;
// maxLog10L = Math.max(maxLog10L, log10LofK);
// if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
// if ( DEBUG ) System.out.printf(" *** breaking early k=%d log10L=%.2f maxLog10L=%.2f%n", k, log10LofK, maxLog10L);
// done = true;
// }
//
// logY.rotate();
// }
//
// return lastK;
}
public int linearExact(Map<String, Genotype> GLs, public int linearExact(Map<String, Genotype> GLs,
double[] log10AlleleFrequencyPriors, double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) { double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) {
@ -605,82 +449,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
return calls; return calls;
} }
// -------------------------------------------------------------------------------------
//
// Gold standard, but O(N^2), implementation.
//
// TODO -- remove me for clarity in this code
//
// -------------------------------------------------------------------------------------
public int gdaN2GoldStandard(Map<String, Genotype> GLs,
double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) {
int numSamples = GLs.size();
int numChr = 2*numSamples;
double[][] logYMatrix = new double[1+numSamples][1+numChr];
for (int i=0; i <=numSamples; i++)
for (int j=0; j <=numChr; j++)
logYMatrix[i][j] = Double.NEGATIVE_INFINITY;
//YMatrix[0][0] = 1.0;
logYMatrix[0][0] = 0.0;
int j=0;
for ( Map.Entry<String, Genotype> sample : GLs.entrySet() ) {
j++;
if ( !sample.getValue().hasLikelihoods() )
continue;
//double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(GLs.get(sample).getLikelihoods());
double[] genotypeLikelihoods = sample.getValue().getLikelihoods().getAsVector();
//double logDenominator = Math.log10(2.0*j*(2.0*j-1));
double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
// special treatment for k=0: iteration reduces to:
//YMatrix[j][0] = YMatrix[j-1][0]*genotypeLikelihoods[GenotypeType.AA.ordinal()];
logYMatrix[j][0] = logYMatrix[j-1][0] + genotypeLikelihoods[idxAA];
for (int k=1; k <= 2*j; k++ ) {
//double num = (2.0*j-k)*(2.0*j-k-1)*YMatrix[j-1][k] * genotypeLikelihoods[GenotypeType.AA.ordinal()];
double logNumerator[];
logNumerator = new double[3];
if (k < 2*j-1)
logNumerator[0] = MathUtils.log10Cache[2*j-k] + MathUtils.log10Cache[2*j-k-1] + logYMatrix[j-1][k] +
genotypeLikelihoods[idxAA];
else
logNumerator[0] = Double.NEGATIVE_INFINITY;
if (k < 2*j)
logNumerator[1] = MathUtils.log10Cache[2*k] + MathUtils.log10Cache[2*j-k]+ logYMatrix[j-1][k-1] +
genotypeLikelihoods[idxAB];
else
logNumerator[1] = Double.NEGATIVE_INFINITY;
if (k > 1)
logNumerator[2] = MathUtils.log10Cache[k] + MathUtils.log10Cache[k-1] + logYMatrix[j-1][k-2] +
genotypeLikelihoods[idxBB];
else
logNumerator[2] = Double.NEGATIVE_INFINITY;
double logNum = MathUtils.softMax(logNumerator);
//YMatrix[j][k] = num/den;
logYMatrix[j][k] = logNum - logDenominator;
}
}
for (int k=0; k <= numChr; k++)
log10AlleleFrequencyPosteriors[k] = logYMatrix[j][k] + log10AlleleFrequencyPriors[k];
return numChr;
}
private final static void printLikelihoods(int numChr, double[][] logYMatrix, double[] log10AlleleFrequencyPriors) { private final static void printLikelihoods(int numChr, double[][] logYMatrix, double[] log10AlleleFrequencyPriors) {
int j = logYMatrix.length - 1; int j = logYMatrix.length - 1;
System.out.printf("-----------------------------------%n"); System.out.printf("-----------------------------------%n");
@ -689,5 +457,4 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
System.out.printf(" %4d\t%8.2f\t%8.2f\t%8.2f%n", k, logYMatrix[j][k], log10AlleleFrequencyPriors[k], posterior); System.out.printf(" %4d\t%8.2f\t%8.2f\t%8.2f%n", k, logYMatrix[j][k], log10AlleleFrequencyPriors[k], posterior);
} }
} }
} }

View File

@ -168,10 +168,6 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "GSA_PRODUCTION_ONLY", shortName = "GSA_PRODUCTION_ONLY", doc = "don't ever use me", required = false) @Argument(fullName = "GSA_PRODUCTION_ONLY", shortName = "GSA_PRODUCTION_ONLY", doc = "don't ever use me", required = false)
public boolean GSA_PRODUCTION_ONLY = false; public boolean GSA_PRODUCTION_ONLY = false;
@Hidden
@Argument(fullName = "exactCalculation", shortName = "exactCalculation", doc = "expt", required = false)
public ExactAFCalculationModel.ExactCalculation EXACT_CALCULATION_TYPE = ExactAFCalculationModel.ExactCalculation.LINEAR_EXPERIMENTAL;
@Hidden @Hidden
@Argument(fullName = "ignoreSNPAlleles", shortName = "ignoreSNPAlleles", doc = "expt", required = false) @Argument(fullName = "ignoreSNPAlleles", shortName = "ignoreSNPAlleles", doc = "expt", required = false)
public boolean IGNORE_SNP_ALLELES = false; public boolean IGNORE_SNP_ALLELES = false;
@ -191,7 +187,6 @@ public class UnifiedArgumentCollection {
uac.GLmodel = GLmodel; uac.GLmodel = GLmodel;
uac.AFmodel = AFmodel; uac.AFmodel = AFmodel;
uac.EXACT_CALCULATION_TYPE = EXACT_CALCULATION_TYPE;
uac.heterozygosity = heterozygosity; uac.heterozygosity = heterozygosity;
uac.PCR_error = PCR_error; uac.PCR_error = PCR_error;
uac.GenotypingMode = GenotypingMode; uac.GenotypingMode = GenotypingMode;

View File

@ -42,6 +42,7 @@ import java.util.*;
* *
* <p>Body test</p> * <p>Body test</p>
*/ */
@Hidden
public class DocumentationTest extends RodWalker<Integer, Integer> { public class DocumentationTest extends RodWalker<Integer, Integer> {
// the docs for the arguments are in the collection // the docs for the arguments are in the collection
@ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();

View File

@ -432,25 +432,37 @@ public class ClippingOp {
} }
private int calculateAlignmentStartShift(Cigar oldCigar, Cigar newCigar) { private int calculateAlignmentStartShift(Cigar oldCigar, Cigar newCigar) {
int shift = 0; int newShift = 0;
int oldShift = 0;
int deletionShift = 0;
// Rewind to previous start (by counting everything that was already clipped in this read)
for (CigarElement cigarElement : oldCigar.getCigarElements()) {
if (!cigarElement.getOperator().consumesReferenceBases())
shift -= cigarElement.getLength();
else
break;
}
// Advance to new start (by counting everything new that has been clipped )
for (CigarElement cigarElement : newCigar.getCigarElements()) { for (CigarElement cigarElement : newCigar.getCigarElements()) {
if (!cigarElement.getOperator().consumesReferenceBases()) if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP)
shift += cigarElement.getLength(); newShift += cigarElement.getLength();
else else
break; break;
} }
return shift; for (CigarElement cigarElement : oldCigar.getCigarElements()) {
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP )
oldShift += Math.min(cigarElement.getLength(), newShift - oldShift);
else
break;
}
int basesClipped = 0;
for (CigarElement cigarElement : oldCigar.getCigarElements()) {
if (basesClipped > newShift) // are we beyond the clipped region?
break;
else if (cigarElement.getOperator() == CigarOperator.DELETION) // if this is a deletion, we have to adjust the starting shift
deletionShift += cigarElement.getLength();
else
basesClipped += cigarElement.getLength();
}
return newShift - oldShift + deletionShift;
} }
private int calculateHardClippingAlignmentShift(CigarElement cigarElement, int clippedLength) { private int calculateHardClippingAlignmentShift(CigarElement cigarElement, int clippedLength) {
@ -462,8 +474,8 @@ public class ClippingOp {
return -clippedLength; return -clippedLength;
} }
if (cigarElement.getOperator() == CigarOperator.DELETION) // if (cigarElement.getOperator() == CigarOperator.DELETION)
return cigarElement.getLength(); // return cigarElement.getLength();
return 0; return 0;
} }

View File

@ -71,21 +71,21 @@ public class ReadClipper {
private SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) { private SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart); int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart);
int stop = (refStop < 0) ? read.getReadLength() - 1: ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop); int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop);
if (start < 0 || stop > read.getReadLength() - 1 + numDeletions(read)) if (start < 0 || stop > read.getReadLength() - 1)
throw new ReviewedStingException("Trying to clip before the start or after the end of a read"); throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
// TODO add check in the Hardclip function if ( start > stop ) {
if ( start > stop ) // stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, ReadUtils.getRefCoordSoftUnclippedEnd(read));
stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, ReadUtils.getRefCoordSoftUnclippedEnd(read)); throw new ReviewedStingException("START > STOP -- this should never happen -- call Mauricio!");
}
//This tries to fix the bug where the deletion is counted a read base and as a result, the hardCLipper runs into //This tries to fix the bug where the deletion is counted a read base and as a result, the hardCLipper runs into
//an endless loop when hard clipping the cigar string because the read coordinates are not covered by the read //an endless loop when hard clipping the cigar string because the read coordinates are not covered by the read
stop -= numDeletions(read); // stop -= numDeletions(read);
if ( start > stop ) // if ( start > stop )
start -= numDeletions(read); // start -= numDeletions(read);
//System.out.println("Clipping start/stop: " + start + "/" + stop); //System.out.println("Clipping start/stop: " + start + "/" + stop);
@ -145,7 +145,7 @@ public class ReadClipper {
cutLeft = readIndex + cigarElement.getLength() - 1; cutLeft = readIndex + cigarElement.getLength() - 1;
} }
} }
else else if (cigarElement.getOperator() != CigarOperator.HARD_CLIP)
rightTail = true; rightTail = true;
if (cigarElement.getOperator().consumesReadBases()) if (cigarElement.getOperator().consumesReadBases())

View File

@ -66,7 +66,8 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {
public boolean includeInDocs(ClassDoc doc) { public boolean includeInDocs(ClassDoc doc) {
try { try {
Class type = HelpUtils.getClassForDoc(doc); Class type = HelpUtils.getClassForDoc(doc);
return JVMUtils.isConcrete(type); boolean hidden = ! getDoclet().showHiddenFeatures() && type.isAnnotationPresent(Hidden.class);
return ! hidden && JVMUtils.isConcrete(type);
} catch ( ClassNotFoundException e ) { } catch ( ClassNotFoundException e ) {
return false; return false;
} }

View File

@ -74,13 +74,6 @@ public class Genotype {
return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attributes, g.isPhased()); return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attributes, g.isPhased());
} }
public static Genotype removePLs(Genotype g) {
Map<String, Object> attrs = new HashMap<String, Object>(g.getAttributes());
attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
attrs.remove(VCFConstants.GENOTYPE_LIKELIHOODS_KEY);
return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attrs, g.isPhased());
}
public static Genotype modifyAlleles(Genotype g, List<Allele> alleles) { public static Genotype modifyAlleles(Genotype g, List<Allele> alleles) {
return new Genotype(g.getSampleName(), alleles, g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, g.getAttributes(), g.isPhased()); return new Genotype(g.getSampleName(), alleles, g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, g.getAttributes(), g.isPhased());
} }

View File

@ -44,6 +44,11 @@ import java.io.Serializable;
import java.util.*; import java.util.*;
public class VariantContextUtils { public class VariantContextUtils {
public final static String MERGE_INTERSECTION = "Intersection";
public final static String MERGE_FILTER_IN_ALL = "FilteredInAll";
public final static String MERGE_REF_IN_ALL = "ReferenceInAll";
public final static String MERGE_FILTER_PREFIX = "filterIn";
final public static JexlEngine engine = new JexlEngine(); final public static JexlEngine engine = new JexlEngine();
static { static {
engine.setSilent(false); // will throw errors now for selects that don't evaluate properly engine.setSilent(false); // will throw errors now for selects that don't evaluate properly
@ -154,6 +159,13 @@ public class VariantContextUtils {
return "%." + precision + "f"; return "%." + precision + "f";
} }
public static Genotype removePLs(Genotype g) {
Map<String, Object> attrs = new HashMap<String, Object>(g.getAttributes());
attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
attrs.remove(VCFConstants.GENOTYPE_LIKELIHOODS_KEY);
return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attrs, g.isPhased());
}
/** /**
* A simple but common wrapper for matching VariantContext objects using JEXL expressions * A simple but common wrapper for matching VariantContext objects using JEXL expressions
*/ */
@ -609,19 +621,20 @@ public class VariantContextUtils {
if ( filteredRecordMergeType == FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED && nFiltered != VCs.size() ) if ( filteredRecordMergeType == FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED && nFiltered != VCs.size() )
filters.clear(); filters.clear();
if ( annotateOrigin ) { // we care about where the call came from if ( annotateOrigin ) { // we care about where the call came from
String setValue; String setValue;
if ( nFiltered == 0 && variantSources.size() == priorityListOfVCs.size() ) // nothing was unfiltered if ( nFiltered == 0 && variantSources.size() == priorityListOfVCs.size() ) // nothing was unfiltered
setValue = "Intersection"; setValue = MERGE_INTERSECTION;
else if ( nFiltered == VCs.size() ) // everything was filtered out else if ( nFiltered == VCs.size() ) // everything was filtered out
setValue = "FilteredInAll"; setValue = MERGE_FILTER_IN_ALL;
else if ( variantSources.isEmpty() ) // everyone was reference else if ( variantSources.isEmpty() ) // everyone was reference
setValue = "ReferenceInAll"; setValue = MERGE_REF_IN_ALL;
else { else {
LinkedHashSet<String> s = new LinkedHashSet<String>(); LinkedHashSet<String> s = new LinkedHashSet<String>();
for ( VariantContext vc : VCs ) for ( VariantContext vc : VCs )
if ( vc.isVariant() ) if ( vc.isVariant() )
s.add( vc.isFiltered() ? "filterIn" + vc.getSource() : vc.getSource() ); s.add( vc.isFiltered() ? MERGE_FILTER_PREFIX + vc.getSource() : vc.getSource() );
setValue = Utils.join("-", s); setValue = Utils.join("-", s);
} }
@ -734,7 +747,7 @@ public class VariantContextUtils {
Map<String, Genotype> newGs = new HashMap<String, Genotype>(genotypes.size()); Map<String, Genotype> newGs = new HashMap<String, Genotype>(genotypes.size());
for ( Map.Entry<String, Genotype> g : genotypes.entrySet() ) { for ( Map.Entry<String, Genotype> g : genotypes.entrySet() ) {
newGs.put(g.getKey(), g.getValue().hasLikelihoods() ? Genotype.removePLs(g.getValue()) : g.getValue()); newGs.put(g.getKey(), g.getValue().hasLikelihoods() ? removePLs(g.getValue()) : g.getValue());
} }
return newGs; return newGs;

View File

@ -132,15 +132,21 @@ public abstract class BaseTest {
*/ */
public static class TestDataProvider { public static class TestDataProvider {
private static final Map<Class, List<Object>> tests = new HashMap<Class, List<Object>>(); private static final Map<Class, List<Object>> tests = new HashMap<Class, List<Object>>();
private final String name;
/** /**
* Create a new TestDataProvider instance bound to the class variable C * Create a new TestDataProvider instance bound to the class variable C
* @param c * @param c
*/ */
public TestDataProvider(Class c) { public TestDataProvider(Class c, String name) {
if ( ! tests.containsKey(c) ) if ( ! tests.containsKey(c) )
tests.put(c, new ArrayList<Object>()); tests.put(c, new ArrayList<Object>());
tests.get(c).add(this); tests.get(c).add(this);
this.name = name;
}
public TestDataProvider(Class c) {
this(c, "");
} }
/** /**
@ -153,6 +159,11 @@ public abstract class BaseTest {
for ( Object x : tests.get(c) ) params2.add(new Object[]{x}); for ( Object x : tests.get(c) ) params2.add(new Object[]{x});
return params2.toArray(new Object[][]{}); return params2.toArray(new Object[][]{});
} }
@Override
public String toString() {
return "TestDataProvider("+name+")";
}
} }
/** /**

View File

@ -33,7 +33,7 @@ public class SymbolicAllelesIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(b36KGReference, "symbolic_alleles_2.vcf"), baseTestString(b36KGReference, "symbolic_alleles_2.vcf"),
1, 1,
Arrays.asList("6645babc8c7d46be0da223477c7b1291")); Arrays.asList("3008d6f5044bc14801e5c58d985dec72"));
executeTest("Test symbolic alleles mixed in with non-symbolic alleles", spec); executeTest("Test symbolic alleles mixed in with non-symbolic alleles", spec);
} }
} }

View File

@ -21,37 +21,25 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE. * OTHER DEALINGS IN THE SOFTWARE.
*/ */
// our package
package org.broadinstitute.sting.utils.variantcontext; package org.broadinstitute.sting.utils.variantcontext;
// the imports for unit testing.
import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.apache.log4j.Priority;
import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.testng.Assert; import org.testng.Assert;
import org.testng.annotations.BeforeSuite; import org.testng.annotations.BeforeSuite;
import org.testng.annotations.Test;
import org.testng.annotations.DataProvider; import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.util.*; import java.util.*;
public class VariantContextUtilsUnitTest extends BaseTest { public class VariantContextUtilsUnitTest extends BaseTest {
Allele Aref, T, delRef, ATC; Allele Aref, T, C, delRef, ATC, ATCATC;
Genotype ref1, snp1, snp2, indel1, indelref;
private GenomeLocParser genomeLocParser; private GenomeLocParser genomeLocParser;
VariantContext refVC, snpVC1, snpVC2, snpVC3, snpVC4, indelVC1, indelVC2, indelVC3;
@BeforeSuite @BeforeSuite
public void setup() { public void setup() {
@ -68,28 +56,35 @@ public class VariantContextUtilsUnitTest extends BaseTest {
Aref = Allele.create("A", true); Aref = Allele.create("A", true);
delRef = Allele.create("-", true); delRef = Allele.create("-", true);
T = Allele.create("T"); T = Allele.create("T");
C = Allele.create("C");
ATC = Allele.create("ATC"); ATC = Allele.create("ATC");
ATCATC = Allele.create("ATCATC");
}
ref1 = new Genotype("ref1", Arrays.asList(Aref, Aref), 5, new double[]{0, 5, 10}); private Genotype makeG(String sample, Allele a1, Allele a2) {
snp1 = new Genotype("snp1", Arrays.asList(Aref,T), 10, new double[]{10, 0, 20}); return new Genotype(sample, Arrays.asList(a1, a2));
snp2 = new Genotype("snp2", Arrays.asList(T,T), 15, new double[]{25, 15, 0}); }
indelref = new Genotype("indelref", Arrays.asList(delRef,delRef), 25, new double[]{0, 25, 30});
indel1 = new Genotype("indel1", Arrays.asList(delRef,ATC), 20, new double[]{20, 0, 30});
refVC = makeVC("refvc", Arrays.asList(Aref), Arrays.asList(ref1)); private Genotype makeG(String sample, Allele a1, Allele a2, double log10pError) {
snpVC1 = makeVC("snpvc1", Arrays.asList(Aref, T), Arrays.asList(snp1)); return new Genotype(sample, Arrays.asList(a1, a2), log10pError);
snpVC2 = makeVC("snpvc2", Arrays.asList(Aref, T), Arrays.asList(snp1, snp2));
snpVC3 = makeVC("snpvc3", Arrays.asList(Aref, T), Arrays.asList(ref1, snp1));
snpVC4 = makeVC("snpvc4", Arrays.asList(Aref, T), Arrays.asList(ref1, snp1, snp2));
indelVC1 = makeVC("indelvc1", Arrays.asList(delRef), Arrays.asList(indelref));
indelVC2 = makeVC("indelvc2", Arrays.asList(delRef, ATC), Arrays.asList(indel1));
indelVC3 = makeVC("indelvc3", Arrays.asList(delRef, ATC), Arrays.asList(indelref, indel1));
} }
private VariantContext makeVC(String source, List<Allele> alleles) { private VariantContext makeVC(String source, List<Allele> alleles) {
return makeVC(source, alleles, null, null); return makeVC(source, alleles, null, null);
} }
private VariantContext makeVC(String source, List<Allele> alleles, Genotype... g1) {
return makeVC(source, alleles, Arrays.asList(g1));
}
private VariantContext makeVC(String source, List<Allele> alleles, String filter) {
return makeVC(source, alleles, filter.equals(".") ? null : new HashSet<String>(Arrays.asList(filter)));
}
private VariantContext makeVC(String source, List<Allele> alleles, Set<String> filters) {
return makeVC(source, alleles, null, filters);
}
private VariantContext makeVC(String source, List<Allele> alleles, Collection<Genotype> genotypes) { private VariantContext makeVC(String source, List<Allele> alleles, Collection<Genotype> genotypes) {
return makeVC(source, alleles, genotypes, null); return makeVC(source, alleles, genotypes, null);
} }
@ -98,45 +93,108 @@ public class VariantContextUtilsUnitTest extends BaseTest {
int start = 10; int start = 10;
int stop = start; // alleles.contains(ATC) ? start + 3 : start; int stop = start; // alleles.contains(ATC) ? start + 3 : start;
return new VariantContext(source, "1", start, stop, alleles, return new VariantContext(source, "1", start, stop, alleles,
VariantContext.genotypeCollectionToMap(new TreeMap<String, Genotype>(), genotypes), genotypes == null ? null : VariantContext.genotypeCollectionToMap(new TreeMap<String, Genotype>(), genotypes),
1.0, filters, null, (byte)'C'); 1.0, filters, null, (byte)'C');
} }
private class SimpleMergeTest extends TestDataProvider { // --------------------------------------------------------------------------------
List<VariantContext> inputVCs; //
VariantContext expectedVC; // Test allele merging
//
// --------------------------------------------------------------------------------
private SimpleMergeTest(VariantContext... vcsArg) { private class MergeAllelesTest extends TestDataProvider {
super(SimpleMergeTest.class); List<List<Allele>> inputs;
LinkedList<VariantContext> allVCs = new LinkedList<VariantContext>(Arrays.asList(vcsArg)); List<Allele> expected;
expectedVC = allVCs.pollLast();
inputVCs = allVCs; private MergeAllelesTest(List<Allele>... arg) {
super(MergeAllelesTest.class);
LinkedList<List<Allele>> all = new LinkedList<List<Allele>>(Arrays.asList(arg));
expected = all.pollLast();
inputs = all;
} }
public String toString() { public String toString() {
return String.format("SimpleMergeTest vc=%s expected=%s", inputVCs, expectedVC); return String.format("MergeAllelesTest input=%s expected=%s", inputs, expected);
} }
} }
@DataProvider(name = "mergeAlleles")
@DataProvider(name = "simplemergedata") public Object[][] mergeAllelesData() {
public Object[][] createSimpleMergeData() {
// first, do no harm // first, do no harm
new SimpleMergeTest(refVC, refVC); new MergeAllelesTest(Arrays.asList(Aref),
new SimpleMergeTest(snpVC1, snpVC1); Arrays.asList(Aref));
new SimpleMergeTest(indelVC1, indelVC1);
new SimpleMergeTest(indelVC3, indelVC3);
new SimpleMergeTest(refVC, snpVC1, snpVC3); new MergeAllelesTest(Arrays.asList(Aref),
new SimpleMergeTest(snpVC1, snpVC2, snpVC2); Arrays.asList(Aref),
new SimpleMergeTest(refVC, snpVC2, snpVC4); Arrays.asList(Aref));
new SimpleMergeTest(indelVC1, indelVC2, indelVC3); new MergeAllelesTest(Arrays.asList(Aref),
new SimpleMergeTest(indelVC1, indelVC3, indelVC3); Arrays.asList(Aref, T),
new SimpleMergeTest(indelVC2, indelVC3, indelVC3); Arrays.asList(Aref, T));
return SimpleMergeTest.getTests(SimpleMergeTest.class); new MergeAllelesTest(Arrays.asList(Aref, C),
Arrays.asList(Aref, T),
Arrays.asList(Aref, C, T));
new MergeAllelesTest(Arrays.asList(Aref, T),
Arrays.asList(Aref, C),
Arrays.asList(Aref, C, T)); // sorted by allele
new MergeAllelesTest(Arrays.asList(Aref, C, T),
Arrays.asList(Aref, C),
Arrays.asList(Aref, C, T));
new MergeAllelesTest(Arrays.asList(Aref, T, C),
Arrays.asList(Aref, C),
Arrays.asList(Aref, C, T)); // sorted by allele
new MergeAllelesTest(Arrays.asList(delRef),
Arrays.asList(delRef)); // todo -- FIXME me GdA
new MergeAllelesTest(Arrays.asList(delRef),
Arrays.asList(delRef, ATC),
Arrays.asList(delRef, ATC));
new MergeAllelesTest(Arrays.asList(delRef),
Arrays.asList(delRef, ATC, ATCATC),
Arrays.asList(delRef, ATC, ATCATC));
new MergeAllelesTest(Arrays.asList(delRef, ATCATC),
Arrays.asList(delRef, ATC, ATCATC),
Arrays.asList(delRef, ATC, ATCATC));
new MergeAllelesTest(Arrays.asList(delRef, ATC),
Arrays.asList(delRef, ATCATC),
Arrays.asList(delRef, ATC, ATCATC));
return MergeAllelesTest.getTests(MergeAllelesTest.class);
} }
@Test(dataProvider = "mergeAlleles")
public void testMergeAlleles(MergeAllelesTest cfg) {
final List<VariantContext> inputs = new ArrayList<VariantContext>();
int i = 0;
for ( final List<Allele> alleles : cfg.inputs ) {
final String name = "vcf" + ++i;
inputs.add(makeVC(name, alleles));
}
final List<String> priority = vcs2priority(inputs);
final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser,
inputs, priority,
VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, "set", false, false);
Assert.assertEquals(merged.getAlleles(), cfg.expected);
}
// --------------------------------------------------------------------------------
//
// Test rsID merging
//
// --------------------------------------------------------------------------------
private class SimpleMergeRSIDTest extends TestDataProvider { private class SimpleMergeRSIDTest extends TestDataProvider {
List<String> inputs; List<String> inputs;
String expected; String expected;
@ -156,10 +214,13 @@ public class VariantContextUtilsUnitTest extends BaseTest {
@DataProvider(name = "simplemergersiddata") @DataProvider(name = "simplemergersiddata")
public Object[][] createSimpleMergeRSIDData() { public Object[][] createSimpleMergeRSIDData() {
new SimpleMergeRSIDTest(".", "."); new SimpleMergeRSIDTest(".", ".");
new SimpleMergeRSIDTest(".", ".", ".");
new SimpleMergeRSIDTest("rs1", "rs1"); new SimpleMergeRSIDTest("rs1", "rs1");
new SimpleMergeRSIDTest("rs1", "rs1", "rs1");
new SimpleMergeRSIDTest(".", "rs1", "rs1"); new SimpleMergeRSIDTest(".", "rs1", "rs1");
new SimpleMergeRSIDTest("rs1", ".", "rs1"); new SimpleMergeRSIDTest("rs1", ".", "rs1");
new SimpleMergeRSIDTest("rs1", "rs2", "rs1,rs2"); new SimpleMergeRSIDTest("rs1", "rs2", "rs1,rs2");
new SimpleMergeRSIDTest("rs1", "rs2", "rs1", "rs1,rs2"); // duplicates
new SimpleMergeRSIDTest("rs2", "rs1", "rs2,rs1"); new SimpleMergeRSIDTest("rs2", "rs1", "rs2,rs1");
new SimpleMergeRSIDTest("rs2", "rs1", ".", "rs2,rs1"); new SimpleMergeRSIDTest("rs2", "rs1", ".", "rs2,rs1");
new SimpleMergeRSIDTest("rs2", ".", "rs1", "rs2,rs1"); new SimpleMergeRSIDTest("rs2", ".", "rs1", "rs2,rs1");
@ -171,32 +232,312 @@ public class VariantContextUtilsUnitTest extends BaseTest {
@Test(dataProvider = "simplemergersiddata") @Test(dataProvider = "simplemergersiddata")
public void testRSIDMerge(SimpleMergeRSIDTest cfg) { public void testRSIDMerge(SimpleMergeRSIDTest cfg) {
List<VariantContext> inputs = new ArrayList<VariantContext>(); final VariantContext snpVC1 = makeVC("snpvc1", Arrays.asList(Aref, T));
for ( String id : cfg.inputs ) { final List<VariantContext> inputs = new ArrayList<VariantContext>();
for ( final String id : cfg.inputs ) {
MutableVariantContext vc = new MutableVariantContext(snpVC1); MutableVariantContext vc = new MutableVariantContext(snpVC1);
if ( ! id.equals(".") ) vc.setID(id); if ( ! id.equals(".") ) vc.setID(id);
inputs.add(vc); inputs.add(vc);
} }
VariantContext merged = myMerge(inputs); final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser,
inputs, null,
VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
VariantContextUtils.GenotypeMergeType.UNSORTED, false, false, "set", false, false);
Assert.assertEquals(merged.getID(), cfg.expected.equals(".") ? null : cfg.expected); Assert.assertEquals(merged.getID(), cfg.expected.equals(".") ? null : cfg.expected);
} }
private VariantContext myMerge(List<VariantContext> inputs) { // --------------------------------------------------------------------------------
List<String> priority = new ArrayList<String>(); //
for ( VariantContext vc : inputs ) priority.add(vc.getSource()); // Test filtered merging
//
// --------------------------------------------------------------------------------
return VariantContextUtils.simpleMerge(genomeLocParser, private class MergeFilteredTest extends TestDataProvider {
inputs, priority, List<VariantContext> inputs;
VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContext expected;
VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false); String setExpected;
VariantContextUtils.FilteredRecordMergeType type;
private MergeFilteredTest(String name, VariantContext input1, VariantContext input2, VariantContext expected, String setExpected) {
this(name, input1, input2, expected, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, setExpected);
}
private MergeFilteredTest(String name, VariantContext input1, VariantContext input2, VariantContext expected, VariantContextUtils.FilteredRecordMergeType type, String setExpected) {
super(MergeFilteredTest.class, name);
LinkedList<VariantContext> all = new LinkedList<VariantContext>(Arrays.asList(input1, input2));
this.expected = expected;
this.type = type;
inputs = all;
this.setExpected = setExpected;
}
public String toString() {
return String.format("%s input=%s expected=%s", super.toString(), inputs, expected);
}
} }
// todo -- add tests for subset merging, especially with correct PLs @DataProvider(name = "mergeFiltered")
// todo -- test priority list public Object[][] mergeFilteredData() {
// todo -- test FilteredRecordMergeType new MergeFilteredTest("AllPass",
// todo -- no annotate origin makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
// todo -- test set key makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
// todo -- test filtered are uncalled makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
VariantContextUtils.MERGE_INTERSECTION);
new MergeFilteredTest("noFilters",
makeVC("1", Arrays.asList(Aref, T), "."),
makeVC("2", Arrays.asList(Aref, T), "."),
makeVC("3", Arrays.asList(Aref, T), "."),
VariantContextUtils.MERGE_INTERSECTION);
new MergeFilteredTest("oneFiltered",
makeVC("1", Arrays.asList(Aref, T), "."),
makeVC("2", Arrays.asList(Aref, T), "FAIL"),
makeVC("3", Arrays.asList(Aref, T), "."),
String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX));
new MergeFilteredTest("onePassOneFail",
makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
makeVC("2", Arrays.asList(Aref, T), "FAIL"),
makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX));
new MergeFilteredTest("AllFiltered",
makeVC("1", Arrays.asList(Aref, T), "FAIL"),
makeVC("2", Arrays.asList(Aref, T), "FAIL"),
makeVC("3", Arrays.asList(Aref, T), "FAIL"),
VariantContextUtils.MERGE_FILTER_IN_ALL);
// test ALL vs. ANY
new MergeFilteredTest("FailOneUnfiltered",
makeVC("1", Arrays.asList(Aref, T), "FAIL"),
makeVC("2", Arrays.asList(Aref, T), "."),
makeVC("3", Arrays.asList(Aref, T), "."),
VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX));
new MergeFilteredTest("OneFailAllUnfilteredArg",
makeVC("1", Arrays.asList(Aref, T), "FAIL"),
makeVC("2", Arrays.asList(Aref, T), "."),
makeVC("3", Arrays.asList(Aref, T), "FAIL"),
VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ALL_UNFILTERED,
String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX));
// test excluding allele in filtered record
new MergeFilteredTest("DontIncludeAlleleOfFilteredRecords",
makeVC("1", Arrays.asList(Aref, T), "."),
makeVC("2", Arrays.asList(Aref, T), "FAIL"),
makeVC("3", Arrays.asList(Aref, T), "."),
String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX));
// promotion of site from unfiltered to PASSES
new MergeFilteredTest("UnfilteredPlusPassIsPass",
makeVC("1", Arrays.asList(Aref, T), "."),
makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
VariantContextUtils.MERGE_INTERSECTION);
new MergeFilteredTest("RefInAll",
makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS),
makeVC("2", Arrays.asList(Aref), VariantContext.PASSES_FILTERS),
makeVC("3", Arrays.asList(Aref), VariantContext.PASSES_FILTERS),
VariantContextUtils.MERGE_REF_IN_ALL);
new MergeFilteredTest("RefInOne",
makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS),
makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
"2");
return MergeFilteredTest.getTests(MergeFilteredTest.class);
}
@Test(dataProvider = "mergeFiltered")
public void testMergeFiltered(MergeFilteredTest cfg) {
final List<String> priority = vcs2priority(cfg.inputs);
final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser,
cfg.inputs, priority, cfg.type, VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false);
// test alleles are equal
Assert.assertEquals(merged.getAlleles(), cfg.expected.getAlleles());
// test set field
Assert.assertEquals(merged.getAttribute("set"), cfg.setExpected);
// test filter field
Assert.assertEquals(merged.getFilters(), cfg.expected.getFilters());
}
// --------------------------------------------------------------------------------
//
// Test genotype merging
//
// --------------------------------------------------------------------------------
private class MergeGenotypesTest extends TestDataProvider {
List<VariantContext> inputs;
VariantContext expected;
List<String> priority;
private MergeGenotypesTest(String name, String priority, VariantContext... arg) {
super(MergeGenotypesTest.class, name);
LinkedList<VariantContext> all = new LinkedList<VariantContext>(Arrays.asList(arg));
this.expected = all.pollLast();
inputs = all;
this.priority = Arrays.asList(priority.split(","));
}
public String toString() {
return String.format("%s input=%s expected=%s", super.toString(), inputs, expected);
}
}
@DataProvider(name = "mergeGenotypes")
public Object[][] mergeGenotypesData() {
new MergeGenotypesTest("TakeGenotypeByPriority-1,2", "1,2",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)),
makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2)),
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)));
new MergeGenotypesTest("TakeGenotypeByPriority-1,2-nocall", "1,2",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Allele.NO_CALL, Allele.NO_CALL, 1)),
makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2)),
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Allele.NO_CALL, Allele.NO_CALL, 1)));
new MergeGenotypesTest("TakeGenotypeByPriority-2,1", "2,1",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)),
makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2)),
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2)));
new MergeGenotypesTest("NonOverlappingGenotypes", "1,2",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)),
makeVC("2", Arrays.asList(Aref, T), makeG("s2", Aref, T, 2)),
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1), makeG("s2", Aref, T, 2)));
new MergeGenotypesTest("PreserveNoCall", "1,2",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Allele.NO_CALL, Allele.NO_CALL, 1)),
makeVC("2", Arrays.asList(Aref, T), makeG("s2", Aref, T, 2)),
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Allele.NO_CALL, Allele.NO_CALL, 1), makeG("s2", Aref, T, 2)));
new MergeGenotypesTest("PerserveAlleles", "1,2",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)),
makeVC("2", Arrays.asList(Aref, C), makeG("s2", Aref, C, 2)),
makeVC("3", Arrays.asList(Aref, C, T), makeG("s1", Aref, T, 1), makeG("s2", Aref, C, 2)));
new MergeGenotypesTest("TakeGenotypePartialOverlap-1,2", "1,2",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)),
makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2), makeG("s3", Aref, T, 3)),
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1), makeG("s3", Aref, T, 3)));
new MergeGenotypesTest("TakeGenotypePartialOverlap-2,1", "2,1",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)),
makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2), makeG("s3", Aref, T, 3)),
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2), makeG("s3", Aref, T, 3)));
// todo -- GDA -- add tests for merging correct PLs
return MergeGenotypesTest.getTests(MergeGenotypesTest.class);
}
@Test(dataProvider = "mergeGenotypes")
public void testMergeGenotypes(MergeGenotypesTest cfg) {
final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser,
cfg.inputs, cfg.priority, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false);
// test alleles are equal
Assert.assertEquals(merged.getAlleles(), cfg.expected.getAlleles());
// test genotypes
assertGenotypesAreMostlyEqual(merged.getGenotypes(), cfg.expected.getGenotypes());
}
// necessary to not overload equals for genotypes
private void assertGenotypesAreMostlyEqual(Map<String, Genotype> actual, Map<String, Genotype> expected) {
if (actual == expected) {
return;
}
if (actual == null || expected == null) {
Assert.fail("Maps not equal: expected: " + expected + " and actual: " + actual);
}
if (actual.size() != expected.size()) {
Assert.fail("Maps do not have the same size:" + actual.size() + " != " + expected.size());
}
for (Map.Entry<String, Genotype> entry : actual.entrySet()) {
String key = entry.getKey();
Genotype value = entry.getValue();
Genotype expectedValue = expected.get(key);
Assert.assertEquals(value.alleles, expectedValue.alleles);
Assert.assertEquals(value.getNegLog10PError(), expectedValue.getNegLog10PError());
Assert.assertEquals(value.hasLikelihoods(), expectedValue.hasLikelihoods());
if ( value.hasLikelihoods() )
Assert.assertEquals(value.getLikelihoods(), expectedValue.getLikelihoods());
}
}
@Test
public void testMergeGenotypesUniquify() {
final VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1));
final VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2));
final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser,
Arrays.asList(vc1, vc2), null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
VariantContextUtils.GenotypeMergeType.UNIQUIFY, false, false, "set", false, false);
// test genotypes
Assert.assertEquals(merged.getGenotypes().keySet(), new HashSet<String>(Arrays.asList("s1.1", "s1.2")));
}
@Test(expectedExceptions = UserException.class)
public void testMergeGenotypesRequireUnique() {
final VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1));
final VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2));
final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser,
Arrays.asList(vc1, vc2), null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE, false, false, "set", false, false);
}
// --------------------------------------------------------------------------------
//
// Misc. tests
//
// --------------------------------------------------------------------------------
@Test
public void testAnnotationSet() {
for ( final boolean annotate : Arrays.asList(true, false)) {
for ( final String set : Arrays.asList("set", "combine", "x")) {
final List<String> priority = Arrays.asList("1", "2");
VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS);
VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS);
final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser,
Arrays.asList(vc1, vc2), priority, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
VariantContextUtils.GenotypeMergeType.PRIORITIZE, annotate, false, set, false, false);
if ( annotate )
Assert.assertEquals(merged.getAttribute(set), VariantContextUtils.MERGE_INTERSECTION);
else
Assert.assertFalse(merged.hasAttribute(set));
}
}
}
private static final List<String> vcs2priority(final Collection<VariantContext> vcs) {
final List<String> priority = new ArrayList<String>();
for ( final VariantContext vc : vcs ) {
priority.add(vc.getSource());
}
return priority;
}
} }