Merge branch 'master' of ssh://gsa3.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
77ba59e30a
|
|
@ -59,7 +59,7 @@ public class LowMemoryIntervalSharder implements Iterator<FilePointer> {
|
||||||
*/
|
*/
|
||||||
public FilePointer next() {
|
public FilePointer next() {
|
||||||
FilePointer current = wrappedIterator.next();
|
FilePointer current = wrappedIterator.next();
|
||||||
while(wrappedIterator.hasNext() && current.minus(wrappedIterator.peek()) == 0)
|
while(wrappedIterator.hasNext() && current.isRegionUnmapped == wrappedIterator.peek().isRegionUnmapped && current.minus(wrappedIterator.peek()) == 0)
|
||||||
current = current.combine(parser,wrappedIterator.next());
|
current = current.combine(parser,wrappedIterator.next());
|
||||||
return current;
|
return current;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.examples;
|
||||||
|
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.commandline.ArgumentCollection;
|
import org.broadinstitute.sting.commandline.ArgumentCollection;
|
||||||
|
import org.broadinstitute.sting.commandline.Hidden;
|
||||||
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
|
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
|
@ -59,6 +60,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||||
* @author Your Name
|
* @author Your Name
|
||||||
* @since Date created
|
* @since Date created
|
||||||
*/
|
*/
|
||||||
|
@Hidden
|
||||||
public class GATKDocsExample extends RodWalker<Integer, Integer> {
|
public class GATKDocsExample extends RodWalker<Integer, Integer> {
|
||||||
/**
|
/**
|
||||||
* Put detailed documentation about the argument here. No need to duplicate the summary information
|
* Put detailed documentation about the argument here. No need to duplicate the summary information
|
||||||
|
|
|
||||||
|
|
@ -24,12 +24,14 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.report;
|
package org.broadinstitute.sting.gatk.report;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tracks a linked list of GATKReportColumn in order by name.
|
* Tracks a linked list of GATKReportColumn in order by name.
|
||||||
*/
|
*/
|
||||||
public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> {
|
public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> implements Iterable<GATKReportColumn> {
|
||||||
private List<String> columnNames = new ArrayList<String>();
|
private List<String> columnNames = new ArrayList<String>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -52,4 +54,14 @@ public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> {
|
||||||
columnNames.add(key);
|
columnNames.add(key);
|
||||||
return super.put(key, value);
|
return super.put(key, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Iterator<GATKReportColumn> iterator() {
|
||||||
|
return new Iterator<GATKReportColumn>() {
|
||||||
|
int offset = 0;
|
||||||
|
public boolean hasNext() { return offset < columnNames.size() ; }
|
||||||
|
public GATKReportColumn next() { return getByIndex(offset++); }
|
||||||
|
public void remove() { throw new UnsupportedOperationException("Cannot remove from a GATKReportColumn iterator"); }
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -286,6 +286,10 @@ public class GATKReportTable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean containsKey(Object primaryKey) {
|
||||||
|
return primaryKeyColumn.contains(primaryKey);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the value for a given position in the table
|
* Set the value for a given position in the table
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -43,6 +43,9 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The allele balance (fraction of ref bases over ref + alt bases) across all bialleleic het-called samples
|
||||||
|
*/
|
||||||
public class AlleleBalance extends InfoFieldAnnotation {
|
public class AlleleBalance extends InfoFieldAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,9 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The allele balance (fraction of ref bases over ref + alt bases) separately for each bialleleic het-called sample
|
||||||
|
*/
|
||||||
public class AlleleBalanceBySample extends GenotypeAnnotation implements ExperimentalAnnotation {
|
public class AlleleBalanceBySample extends GenotypeAnnotation implements ExperimentalAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
|
||||||
|
|
|
||||||
|
|
@ -6,8 +6,9 @@ import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||||
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Abstract base class for all annotations that are normalized by depth
|
||||||
|
*/
|
||||||
public abstract class AnnotationByDepth extends InfoFieldAnnotation {
|
public abstract class AnnotationByDepth extends InfoFieldAnnotation {
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -47,6 +47,9 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Count of A, C, G, T bases across all samples
|
||||||
|
*/
|
||||||
public class BaseCounts extends InfoFieldAnnotation {
|
public class BaseCounts extends InfoFieldAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,9 @@ import java.util.LinkedHashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for base qualities (ref bases vs. bases of the alternate allele)
|
||||||
|
*/
|
||||||
public class BaseQualityRankSumTest extends RankSumTest {
|
public class BaseQualityRankSumTest extends RankSumTest {
|
||||||
public List<String> getKeyNames() { return Arrays.asList("BaseQRankSum"); }
|
public List<String> getKeyNames() { return Arrays.asList("BaseQRankSum"); }
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -44,6 +44,11 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allele count in genotypes, for each ALT allele, in the same order as listed;
|
||||||
|
* allele Frequency, for each ALT allele, in the same order as listed; total number
|
||||||
|
* of alleles in called genotypes.
|
||||||
|
*/
|
||||||
public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation {
|
public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation {
|
||||||
|
|
||||||
private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY };
|
private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY };
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,23 @@ import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Total (unfiltered) depth over all samples.
|
||||||
|
*
|
||||||
|
* This and AD are complementary fields that are two important ways of thinking about the depth of the data for this sample
|
||||||
|
* at this site. The DP field describe the total depth of reads that passed the Unified Genotypers internal
|
||||||
|
* quality control metrics (like MAPQ > 17, for example), whatever base was present in the read at this site.
|
||||||
|
* The AD values (one for each of REF and ALT fields) is the count of all reads that carried with them the
|
||||||
|
* REF and ALT alleles. The reason for this distinction is that the DP is in some sense reflective of the
|
||||||
|
* power I have to determine the genotype of the sample at this site, while the AD tells me how many times
|
||||||
|
* I saw each of the REF and ALT alleles in the reads, free of any bias potentially introduced by filtering
|
||||||
|
* the reads. If, for example, I believe there really is a an A/T polymorphism at a site, then I would like
|
||||||
|
* to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would
|
||||||
|
* normally be excluded from the statistical calculations going into GQ and QUAL.
|
||||||
|
*
|
||||||
|
* Note that the DP is affected by downsampling (-dcov) though, so the max value one can obtain for N samples with
|
||||||
|
* -dcov D is N * D
|
||||||
|
*/
|
||||||
public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation {
|
public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
|
|
|
||||||
|
|
@ -23,6 +23,25 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The depth of coverage of each VCF allele in this sample.
|
||||||
|
*
|
||||||
|
* This and DP are complementary fields that are two important ways of thinking about the depth of the data for this sample
|
||||||
|
* at this site. The DP field describe the total depth of reads that passed the Unified Genotypers internal
|
||||||
|
* quality control metrics (like MAPQ > 17, for example), whatever base was present in the read at this site.
|
||||||
|
* The AD values (one for each of REF and ALT fields) is the count of all reads that carried with them the
|
||||||
|
* REF and ALT alleles. The reason for this distinction is that the DP is in some sense reflective of the
|
||||||
|
* power I have to determine the genotype of the sample at this site, while the AD tells me how many times
|
||||||
|
* I saw each of the REF and ALT alleles in the reads, free of any bias potentially introduced by filtering
|
||||||
|
* the reads. If, for example, I believe there really is a an A/T polymorphism at a site, then I would like
|
||||||
|
* to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would
|
||||||
|
* normally be excluded from the statistical calculations going into GQ and QUAL. Please note, however, that
|
||||||
|
* the AD isn't necessarily calculated exactly for indels (it counts as non-reference only those indels that
|
||||||
|
* are actually present and correctly left-aligned in the alignments themselves). Because of this fact and
|
||||||
|
* because the AD includes reads and bases that were filtered by the Unified Genotyper, <b>one should not base
|
||||||
|
* assumptions about the underlying genotype based on it</b>; instead, the genotype likelihoods (PLs) are what
|
||||||
|
* determine the genotype calls (see below).
|
||||||
|
*/
|
||||||
public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation {
|
public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation {
|
||||||
|
|
||||||
private static String REF_ALLELE = "REF";
|
private static String REF_ALLELE = "REF";
|
||||||
|
|
|
||||||
|
|
@ -43,6 +43,11 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Phred-scaled p-value using Fisher's Exact Test to detect strand bias (the variation
|
||||||
|
* being seen on only the forward or only the reverse strand) in the reads? More bias is
|
||||||
|
* indicative of false positive calls.
|
||||||
|
*/
|
||||||
public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation {
|
public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation {
|
||||||
private static final String FS = "FS";
|
private static final String FS = "FS";
|
||||||
private static final double MIN_PVALUE = 1E-320;
|
private static final double MIN_PVALUE = 1E-320;
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,9 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The GC content (# GC bases / # all bases) of the reference within 50 bp +/- this site
|
||||||
|
*/
|
||||||
public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
|
|
|
||||||
|
|
@ -34,12 +34,12 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnot
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
import org.broadinstitute.sting.utils.Haplotype;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.genotype.Haplotype;
|
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||||
|
|
@ -49,6 +49,10 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Consistency of the site with two (and only two) segregating haplotypes. Higher scores
|
||||||
|
* are indicative of regions with bad alignments, often leading to artifactual SNP and indel calls.
|
||||||
|
*/
|
||||||
public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation {
|
public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation {
|
||||||
private final static boolean DEBUG = false;
|
private final static boolean DEBUG = false;
|
||||||
private final static int MIN_CONTEXT_WING_SIZE = 10;
|
private final static int MIN_CONTEXT_WING_SIZE = 10;
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,9 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Phred-scaled P value of genotype-based (using GT field) test for Hardy-Weinberg test for disequilibrium
|
||||||
|
*/
|
||||||
public class HardyWeinberg extends InfoFieldAnnotation implements WorkInProgressAnnotation {
|
public class HardyWeinberg extends InfoFieldAnnotation implements WorkInProgressAnnotation {
|
||||||
|
|
||||||
private static final int MIN_SAMPLES = 10;
|
private static final int MIN_SAMPLES = 10;
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,9 @@ import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Largest contiguous homopolymer run of the variant allele in either direction on the reference.
|
||||||
|
*/
|
||||||
public class HomopolymerRun extends InfoFieldAnnotation implements StandardAnnotation {
|
public class HomopolymerRun extends InfoFieldAnnotation implements StandardAnnotation {
|
||||||
|
|
||||||
private boolean ANNOTATE_INDELS = true;
|
private boolean ANNOTATE_INDELS = true;
|
||||||
|
|
|
||||||
|
|
@ -17,14 +17,15 @@ import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
|
||||||
* Created by IntelliJ IDEA.
|
|
||||||
* User: rpoplin
|
|
||||||
* Date: 5/16/11
|
|
||||||
*/
|
|
||||||
|
|
||||||
// A set of annotations calculated directly from the GLs
|
/**
|
||||||
public class GLstats extends InfoFieldAnnotation implements StandardAnnotation {
|
* Likelihood-based (using PL field) test for the inbreeding among samples.
|
||||||
|
*
|
||||||
|
* A continuous generalization of the Hardy-Weinberg test for disequilibrium that works
|
||||||
|
* well with limited coverage per sample. See the 1000 Genomes Phase I release for
|
||||||
|
* more information.
|
||||||
|
*/
|
||||||
|
public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnnotation {
|
||||||
|
|
||||||
private static final int MIN_SAMPLES = 10;
|
private static final int MIN_SAMPLES = 10;
|
||||||
|
|
||||||
|
|
@ -14,11 +14,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Rough category of indel type (insertion, deletion, multi-allelic, other)
|
||||||
* User: delangel
|
|
||||||
* Date: Mar 11, 2011
|
|
||||||
* Time: 11:47:33 AM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
*/
|
||||||
public class IndelType extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
public class IndelType extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,9 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Triplet annotation: fraction of MAQP == 0, MAPQ < 10, and count of all mapped reads
|
||||||
|
*/
|
||||||
public class LowMQ extends InfoFieldAnnotation {
|
public class LowMQ extends InfoFieldAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,9 @@ import java.util.LinkedHashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for mapping qualities (reads with ref bases vs. those with the alternate allele)
|
||||||
|
*/
|
||||||
public class MappingQualityRankSumTest extends RankSumTest {
|
public class MappingQualityRankSumTest extends RankSumTest {
|
||||||
|
|
||||||
public List<String> getKeyNames() { return Arrays.asList("MQRankSum"); }
|
public List<String> getKeyNames() { return Arrays.asList("MQRankSum"); }
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,9 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Total count across all samples of mapping quality zero reads
|
||||||
|
*/
|
||||||
public class MappingQualityZero extends InfoFieldAnnotation implements StandardAnnotation {
|
public class MappingQualityZero extends InfoFieldAnnotation implements StandardAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
|
|
|
||||||
|
|
@ -44,11 +44,7 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Count for each sample of mapping quality zero reads
|
||||||
* User: asivache
|
|
||||||
* Date: Feb 4, 2011
|
|
||||||
* Time: 6:46:25 PM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
*/
|
||||||
public class MappingQualityZeroBySample extends GenotypeAnnotation {
|
public class MappingQualityZeroBySample extends GenotypeAnnotation {
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker,
|
public Map<String, Object> annotate(RefMetaDataTracker tracker,
|
||||||
|
|
|
||||||
|
|
@ -17,8 +17,9 @@ import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fraction of all reads across samples that have mapping quality zero
|
||||||
|
*/
|
||||||
public class MappingQualityZeroFraction extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
public class MappingQualityZeroFraction extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
|
|
|
||||||
|
|
@ -17,11 +17,8 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* The number of N bases, counting only SOLiD data
|
||||||
* User: rpoplin
|
|
||||||
* Date: 5/16/11
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class NBaseCount extends InfoFieldAnnotation {
|
public class NBaseCount extends InfoFieldAnnotation {
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
if( stratifiedContexts.size() == 0 )
|
if( stratifiedContexts.size() == 0 )
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.commandline.Hidden;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
|
@ -15,7 +16,11 @@ import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Variant confidence (given as (AB+BB)/AA from the PLs) / unfiltered depth.
|
||||||
|
*
|
||||||
|
* Low scores are indicative of false positive calls and artifacts.
|
||||||
|
*/
|
||||||
public class QualByDepth extends AnnotationByDepth implements StandardAnnotation {
|
public class QualByDepth extends AnnotationByDepth implements StandardAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,9 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Root Mean Square of the mapping quality of the reads across all samples.
|
||||||
|
*/
|
||||||
public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAnnotation {
|
public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,9 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Abstract root for all RankSum based annotations
|
||||||
|
*/
|
||||||
public abstract class RankSumTest extends InfoFieldAnnotation implements StandardAnnotation {
|
public abstract class RankSumTest extends InfoFieldAnnotation implements StandardAnnotation {
|
||||||
static final double INDEL_LIKELIHOOD_THRESH = 0.1;
|
static final double INDEL_LIKELIHOOD_THRESH = 0.1;
|
||||||
static final boolean DEBUG = false;
|
static final boolean DEBUG = false;
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.commandline.Hidden;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
|
@ -47,12 +48,9 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Unsupported
|
||||||
* User: asivache
|
|
||||||
* Date: Feb 4, 2011
|
|
||||||
* Time: 3:59:27 PM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
*/
|
||||||
|
@Hidden
|
||||||
public class ReadDepthAndAllelicFractionBySample extends GenotypeAnnotation {
|
public class ReadDepthAndAllelicFractionBySample extends GenotypeAnnotation {
|
||||||
|
|
||||||
private static String REF_ALLELE = "REF";
|
private static String REF_ALLELE = "REF";
|
||||||
|
|
|
||||||
|
|
@ -19,11 +19,8 @@ import java.util.LinkedHashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for the distance from the end of the read for reads with the alternate allele; if the alternate allele is only seen near the ends of reads this is indicative of error).
|
||||||
* User: rpoplin
|
|
||||||
* Date: 3/30/11
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class ReadPosRankSumTest extends RankSumTest {
|
public class ReadPosRankSumTest extends RankSumTest {
|
||||||
|
|
||||||
public List<String> getKeyNames() { return Arrays.asList("ReadPosRankSum"); }
|
public List<String> getKeyNames() { return Arrays.asList("ReadPosRankSum"); }
|
||||||
|
|
|
||||||
|
|
@ -15,8 +15,9 @@ import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* SB annotation value by depth of alt containing samples
|
||||||
|
*/
|
||||||
public class SBByDepth extends AnnotationByDepth {
|
public class SBByDepth extends AnnotationByDepth {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
|
|
|
||||||
|
|
@ -41,7 +41,9 @@ import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* List all of the samples in the info field
|
||||||
|
*/
|
||||||
public class SampleList extends InfoFieldAnnotation {
|
public class SampleList extends InfoFieldAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
|
|
|
||||||
|
|
@ -82,7 +82,8 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
|
||||||
GENE_NAME_KEY ("SNPEFF_GENE_NAME", 3),
|
GENE_NAME_KEY ("SNPEFF_GENE_NAME", 3),
|
||||||
GENE_BIOTYPE_KEY ("SNPEFF_GENE_BIOTYPE", 4),
|
GENE_BIOTYPE_KEY ("SNPEFF_GENE_BIOTYPE", 4),
|
||||||
TRANSCRIPT_ID_KEY ("SNPEFF_TRANSCRIPT_ID", 6),
|
TRANSCRIPT_ID_KEY ("SNPEFF_TRANSCRIPT_ID", 6),
|
||||||
EXON_ID_KEY ("SNPEFF_EXON_ID", 7);
|
EXON_ID_KEY ("SNPEFF_EXON_ID", 7),
|
||||||
|
FUNCTIONAL_CLASS_KEY ("SNPEFF_FUNCTIONAL_CLASS", -1);
|
||||||
|
|
||||||
// Actual text of the key
|
// Actual text of the key
|
||||||
private final String keyName;
|
private final String keyName;
|
||||||
|
|
@ -108,48 +109,73 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
|
||||||
// Possible SnpEff biological effects. All effect names found in the SnpEff input file
|
// Possible SnpEff biological effects. All effect names found in the SnpEff input file
|
||||||
// are validated against this list.
|
// are validated against this list.
|
||||||
public enum EffectType {
|
public enum EffectType {
|
||||||
NONE,
|
// High-impact effects:
|
||||||
CHROMOSOME,
|
FRAME_SHIFT (EffectFunctionalClass.NONE, false),
|
||||||
INTERGENIC,
|
STOP_GAINED (EffectFunctionalClass.NONSENSE, false),
|
||||||
UPSTREAM,
|
START_LOST (EffectFunctionalClass.NONE, false),
|
||||||
UTR_5_PRIME,
|
SPLICE_SITE_ACCEPTOR (EffectFunctionalClass.NONE, false),
|
||||||
UTR_5_DELETED,
|
SPLICE_SITE_DONOR (EffectFunctionalClass.NONE, false),
|
||||||
START_GAINED,
|
EXON_DELETED (EffectFunctionalClass.NONE, false),
|
||||||
SPLICE_SITE_ACCEPTOR,
|
STOP_LOST (EffectFunctionalClass.NONE, false),
|
||||||
SPLICE_SITE_DONOR,
|
|
||||||
START_LOST,
|
// Moderate-impact effects:
|
||||||
SYNONYMOUS_START,
|
NON_SYNONYMOUS_CODING (EffectFunctionalClass.MISSENSE, false),
|
||||||
NON_SYNONYMOUS_START,
|
CODON_CHANGE (EffectFunctionalClass.NONE, false),
|
||||||
CDS,
|
CODON_INSERTION (EffectFunctionalClass.NONE, false),
|
||||||
GENE,
|
CODON_CHANGE_PLUS_CODON_INSERTION (EffectFunctionalClass.NONE, false),
|
||||||
TRANSCRIPT,
|
CODON_DELETION (EffectFunctionalClass.NONE, false),
|
||||||
EXON,
|
CODON_CHANGE_PLUS_CODON_DELETION (EffectFunctionalClass.NONE, false),
|
||||||
EXON_DELETED,
|
UTR_5_DELETED (EffectFunctionalClass.NONE, false),
|
||||||
NON_SYNONYMOUS_CODING,
|
UTR_3_DELETED (EffectFunctionalClass.NONE, false),
|
||||||
SYNONYMOUS_CODING,
|
|
||||||
FRAME_SHIFT,
|
// Low-impact effects:
|
||||||
CODON_CHANGE,
|
SYNONYMOUS_CODING (EffectFunctionalClass.SILENT, false),
|
||||||
CODON_INSERTION,
|
SYNONYMOUS_START (EffectFunctionalClass.SILENT, false),
|
||||||
CODON_CHANGE_PLUS_CODON_INSERTION,
|
NON_SYNONYMOUS_START (EffectFunctionalClass.SILENT, false),
|
||||||
CODON_DELETION,
|
SYNONYMOUS_STOP (EffectFunctionalClass.SILENT, false),
|
||||||
CODON_CHANGE_PLUS_CODON_DELETION,
|
NON_SYNONYMOUS_STOP (EffectFunctionalClass.SILENT, false),
|
||||||
STOP_GAINED,
|
START_GAINED (EffectFunctionalClass.NONE, false),
|
||||||
SYNONYMOUS_STOP,
|
|
||||||
NON_SYNONYMOUS_STOP,
|
// Modifiers:
|
||||||
STOP_LOST,
|
NONE (EffectFunctionalClass.NONE, true),
|
||||||
INTRON,
|
CHROMOSOME (EffectFunctionalClass.NONE, true),
|
||||||
UTR_3_PRIME,
|
INTERGENIC (EffectFunctionalClass.NONE, true),
|
||||||
UTR_3_DELETED,
|
UPSTREAM (EffectFunctionalClass.NONE, true),
|
||||||
DOWNSTREAM,
|
UTR_5_PRIME (EffectFunctionalClass.NONE, true),
|
||||||
INTRON_CONSERVED,
|
CDS (EffectFunctionalClass.NONE, true),
|
||||||
INTERGENIC_CONSERVED,
|
GENE (EffectFunctionalClass.NONE, true),
|
||||||
REGULATION,
|
TRANSCRIPT (EffectFunctionalClass.NONE, true),
|
||||||
CUSTOM,
|
EXON (EffectFunctionalClass.NONE, true),
|
||||||
WITHIN_NON_CODING_GENE
|
INTRON (EffectFunctionalClass.NONE, true),
|
||||||
|
UTR_3_PRIME (EffectFunctionalClass.NONE, true),
|
||||||
|
DOWNSTREAM (EffectFunctionalClass.NONE, true),
|
||||||
|
INTRON_CONSERVED (EffectFunctionalClass.NONE, true),
|
||||||
|
INTERGENIC_CONSERVED (EffectFunctionalClass.NONE, true),
|
||||||
|
REGULATION (EffectFunctionalClass.NONE, true),
|
||||||
|
CUSTOM (EffectFunctionalClass.NONE, true),
|
||||||
|
WITHIN_NON_CODING_GENE (EffectFunctionalClass.NONE, true);
|
||||||
|
|
||||||
|
private final EffectFunctionalClass functionalClass;
|
||||||
|
private final boolean isModifier;
|
||||||
|
|
||||||
|
EffectType ( EffectFunctionalClass functionalClass, boolean isModifier ) {
|
||||||
|
this.functionalClass = functionalClass;
|
||||||
|
this.isModifier = isModifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
public EffectFunctionalClass getFunctionalClass() {
|
||||||
|
return functionalClass;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isModifier() {
|
||||||
|
return isModifier;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// SnpEff labels each effect as either LOW, MODERATE, or HIGH impact.
|
// SnpEff labels each effect as either LOW, MODERATE, or HIGH impact. We take the additional step of
|
||||||
|
// classifying some of the LOW impact effects as MODIFIERs.
|
||||||
public enum EffectImpact {
|
public enum EffectImpact {
|
||||||
|
MODIFIER (0),
|
||||||
LOW (1),
|
LOW (1),
|
||||||
MODERATE (2),
|
MODERATE (2),
|
||||||
HIGH (3);
|
HIGH (3);
|
||||||
|
|
@ -163,6 +189,10 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
|
||||||
public boolean isHigherImpactThan ( EffectImpact other ) {
|
public boolean isHigherImpactThan ( EffectImpact other ) {
|
||||||
return this.severityRating > other.severityRating;
|
return this.severityRating > other.severityRating;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean isSameImpactAs ( EffectImpact other ) {
|
||||||
|
return this.severityRating == other.severityRating;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// SnpEff labels most effects as either CODING or NON_CODING, but sometimes omits this information.
|
// SnpEff labels most effects as either CODING or NON_CODING, but sometimes omits this information.
|
||||||
|
|
@ -172,6 +202,24 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
|
||||||
UNKNOWN
|
UNKNOWN
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We assign a functional class to each SnpEff effect.
|
||||||
|
public enum EffectFunctionalClass {
|
||||||
|
NONE (0),
|
||||||
|
SILENT (1),
|
||||||
|
MISSENSE (2),
|
||||||
|
NONSENSE (3);
|
||||||
|
|
||||||
|
private final int priority;
|
||||||
|
|
||||||
|
EffectFunctionalClass ( int priority ) {
|
||||||
|
this.priority = priority;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isHigherPriorityThan ( EffectFunctionalClass other ) {
|
||||||
|
return this.priority > other.priority;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines ) {
|
public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines ) {
|
||||||
// Make sure that we actually have a valid SnpEff rod binding (just in case the user specified -A SnpEff
|
// Make sure that we actually have a valid SnpEff rod binding (just in case the user specified -A SnpEff
|
||||||
// without providing a SnpEff rod via --snpEffFile):
|
// without providing a SnpEff rod via --snpEffFile):
|
||||||
|
|
@ -336,7 +384,8 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
|
||||||
InfoFieldKey.GENE_NAME_KEY.getKeyName(),
|
InfoFieldKey.GENE_NAME_KEY.getKeyName(),
|
||||||
InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(),
|
InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(),
|
||||||
InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(),
|
InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(),
|
||||||
InfoFieldKey.EXON_ID_KEY.getKeyName()
|
InfoFieldKey.EXON_ID_KEY.getKeyName(),
|
||||||
|
InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName()
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -349,7 +398,8 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
|
||||||
new VCFInfoHeaderLine(InfoFieldKey.GENE_NAME_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Gene name for the highest-impact effect resulting from the current variant"),
|
new VCFInfoHeaderLine(InfoFieldKey.GENE_NAME_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Gene name for the highest-impact effect resulting from the current variant"),
|
||||||
new VCFInfoHeaderLine(InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Gene biotype for the highest-impact effect resulting from the current variant"),
|
new VCFInfoHeaderLine(InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Gene biotype for the highest-impact effect resulting from the current variant"),
|
||||||
new VCFInfoHeaderLine(InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Transcript ID for the highest-impact effect resulting from the current variant"),
|
new VCFInfoHeaderLine(InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Transcript ID for the highest-impact effect resulting from the current variant"),
|
||||||
new VCFInfoHeaderLine(InfoFieldKey.EXON_ID_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Exon ID for the highest-impact effect resulting from the current variant")
|
new VCFInfoHeaderLine(InfoFieldKey.EXON_ID_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Exon ID for the highest-impact effect resulting from the current variant"),
|
||||||
|
new VCFInfoHeaderLine(InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Functional class of the highest-impact effect resulting from the current variant: " + Arrays.toString(EffectFunctionalClass.values()))
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -411,11 +461,16 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
if ( effect != null && effect.isModifier() ) {
|
||||||
impact = EffectImpact.valueOf(effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]);
|
impact = EffectImpact.MODIFIER;
|
||||||
}
|
}
|
||||||
catch ( IllegalArgumentException e ) {
|
else {
|
||||||
parseError(String.format("Unrecognized value for effect impact: %s", effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]));
|
try {
|
||||||
|
impact = EffectImpact.valueOf(effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]);
|
||||||
|
}
|
||||||
|
catch ( IllegalArgumentException e ) {
|
||||||
|
parseError(String.format("Unrecognized value for effect impact: %s", effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
codonChange = effectMetadata[InfoFieldKey.CODON_CHANGE_KEY.getFieldIndex()];
|
codonChange = effectMetadata[InfoFieldKey.CODON_CHANGE_KEY.getFieldIndex()];
|
||||||
|
|
@ -472,9 +527,17 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
|
||||||
}
|
}
|
||||||
|
|
||||||
// Otherwise, both effects are either in or not in a coding gene, so we compare the impacts
|
// Otherwise, both effects are either in or not in a coding gene, so we compare the impacts
|
||||||
// of the effects themselves:
|
// of the effects themselves. Effects with the same impact are tie-broken using the
|
||||||
|
// functional class of the effect:
|
||||||
|
|
||||||
return impact.isHigherImpactThan(other.impact);
|
if ( impact.isHigherImpactThan(other.impact) ) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else if ( impact.isSameImpactAs(other.impact) ) {
|
||||||
|
return effect.getFunctionalClass().isHigherPriorityThan(other.effect.getFunctionalClass());
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Map<String, Object> getAnnotations() {
|
public Map<String, Object> getAnnotations() {
|
||||||
|
|
@ -488,6 +551,7 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
|
||||||
addAnnotation(annotations, InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(), geneBiotype);
|
addAnnotation(annotations, InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(), geneBiotype);
|
||||||
addAnnotation(annotations, InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(), transcriptID);
|
addAnnotation(annotations, InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(), transcriptID);
|
||||||
addAnnotation(annotations, InfoFieldKey.EXON_ID_KEY.getKeyName(), exonID);
|
addAnnotation(annotations, InfoFieldKey.EXON_ID_KEY.getKeyName(), exonID);
|
||||||
|
addAnnotation(annotations, InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), effect.getFunctionalClass().toString());
|
||||||
|
|
||||||
return annotations;
|
return annotations;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,9 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fraction of reads containing spanning deletions at this site.
|
||||||
|
*/
|
||||||
public class SpanningDeletions extends InfoFieldAnnotation implements StandardAnnotation {
|
public class SpanningDeletions extends InfoFieldAnnotation implements StandardAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.commandline.Hidden;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
|
@ -19,12 +20,9 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Counts of bases from SLX, 454, and SOLiD at this site
|
||||||
* User: delangel
|
|
||||||
* Date: 6/29/11
|
|
||||||
* Time: 3:14 PM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
*/
|
||||||
|
@Hidden
|
||||||
public class TechnologyComposition extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
public class TechnologyComposition extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||||
private String nSLX = "NumSLX";
|
private String nSLX = "NumSLX";
|
||||||
private String n454 ="Num454";
|
private String n454 ="Num454";
|
||||||
|
|
|
||||||
|
|
@ -162,6 +162,12 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
||||||
@Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false)
|
@Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false)
|
||||||
protected boolean indelsOnly = false;
|
protected boolean indelsOnly = false;
|
||||||
|
|
||||||
|
@Argument(fullName="family_string",shortName="family",required=false,doc="A family string of the form mom+dad=child for use with the mendelian violation ratio annotation")
|
||||||
|
public String familyStr = null;
|
||||||
|
|
||||||
|
@Argument(fullName="MendelViolationGenotypeQualityThreshold",shortName="mvq",required=false,doc="The genotype quality treshold in order to annotate mendelian violation ratio")
|
||||||
|
public double minGenotypeQualityP = 0.0;
|
||||||
|
|
||||||
private VariantAnnotatorEngine engine;
|
private VariantAnnotatorEngine engine;
|
||||||
|
|
||||||
private Collection<VariantContext> indelBufferContext;
|
private Collection<VariantContext> indelBufferContext;
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@
|
||||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils.genotype;
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
|
||||||
|
|
@ -34,7 +34,7 @@ import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
* Time: 6:46:09 PM
|
* Time: 6:46:09 PM
|
||||||
* To change this template use File | Settings | File Templates.
|
* To change this template use File | Settings | File Templates.
|
||||||
*/
|
*/
|
||||||
public enum DiploidGenotype {
|
enum DiploidGenotype {
|
||||||
AA ('A', 'A'),
|
AA ('A', 'A'),
|
||||||
AC ('A', 'C'),
|
AC ('A', 'C'),
|
||||||
AG ('A', 'G'),
|
AG ('A', 'G'),
|
||||||
|
|
@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.indels.HaplotypeIndelErrorModel;
|
import org.broadinstitute.sting.gatk.walkers.indels.HaplotypeIndelErrorModel;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,6 @@ import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
|
||||||
import org.broadinstitute.sting.utils.pileup.FragmentPileup;
|
import org.broadinstitute.sting.utils.pileup.FragmentPileup;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,6 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -48,27 +48,12 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
// code for testing purposes
|
// code for testing purposes
|
||||||
//
|
//
|
||||||
private final static boolean DEBUG = false;
|
private final static boolean DEBUG = false;
|
||||||
private final static boolean PRINT_LIKELIHOODS = false;
|
|
||||||
private final static int N_CYCLES = 1;
|
|
||||||
private SimpleTimer timerExpt = new SimpleTimer("linearExactBanded");
|
|
||||||
private SimpleTimer timerGS = new SimpleTimer("linearExactGS");
|
|
||||||
private final static boolean COMPARE_TO_GS = false;
|
|
||||||
|
|
||||||
public enum ExactCalculation {
|
|
||||||
N2_GOLD_STANDARD,
|
|
||||||
LINEAR_EXPERIMENTAL
|
|
||||||
}
|
|
||||||
|
|
||||||
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
|
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
|
||||||
|
private final boolean SIMPLE_GREEDY_GENOTYPER = false;
|
||||||
private boolean SIMPLE_GREEDY_GENOTYPER = false;
|
|
||||||
|
|
||||||
private final static double SUM_GL_THRESH_NOCALL = -0.001; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
|
private final static double SUM_GL_THRESH_NOCALL = -0.001; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
|
||||||
|
|
||||||
final private ExactCalculation calcToUse;
|
|
||||||
protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
||||||
super(UAC, N, logger, verboseWriter);
|
super(UAC, N, logger, verboseWriter);
|
||||||
calcToUse = UAC.EXACT_CALCULATION_TYPE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void getLog10PNonRef(RefMetaDataTracker tracker,
|
public void getLog10PNonRef(RefMetaDataTracker tracker,
|
||||||
|
|
@ -76,43 +61,12 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
Map<String, Genotype> GLs, Set<Allele>alleles,
|
Map<String, Genotype> GLs, Set<Allele>alleles,
|
||||||
double[] log10AlleleFrequencyPriors,
|
double[] log10AlleleFrequencyPriors,
|
||||||
double[] log10AlleleFrequencyPosteriors) {
|
double[] log10AlleleFrequencyPosteriors) {
|
||||||
// todo -- REMOVE ME AFTER TESTING
|
final int numAlleles = alleles.size();
|
||||||
// todo -- REMOVE ME AFTER TESTING
|
final double[][] posteriorCache = numAlleles > 2 ? new double[numAlleles-1][] : null;
|
||||||
// todo -- REMOVE ME AFTER TESTING
|
final double[] bestAFguess = numAlleles > 2 ? new double[numAlleles-1] : null;
|
||||||
double[] gsPosteriors;
|
|
||||||
if ( COMPARE_TO_GS ) // due to annoying special values in incoming array, we have to clone up here
|
|
||||||
gsPosteriors = log10AlleleFrequencyPosteriors.clone();
|
|
||||||
|
|
||||||
int idxAA = GenotypeType.AA.ordinal();
|
|
||||||
int idxAB = GenotypeType.AB.ordinal();
|
|
||||||
int idxBB = GenotypeType.BB.ordinal();
|
|
||||||
|
|
||||||
// todo -- remove me after testing
|
|
||||||
if ( N_CYCLES > 1 ) {
|
|
||||||
for ( int i = 0; i < N_CYCLES; i++) {
|
|
||||||
timerGS.restart();
|
|
||||||
linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.clone(), idxAA, idxAB, idxBB);
|
|
||||||
timerGS.stop();
|
|
||||||
|
|
||||||
timerExpt.restart();
|
|
||||||
linearExactBanded(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.clone());
|
|
||||||
timerExpt.stop();
|
|
||||||
}
|
|
||||||
|
|
||||||
System.out.printf("good = %.2f, expt = %.2f, delta = %.2f%n",
|
|
||||||
timerGS.getElapsedTime(), timerExpt.getElapsedTime(), timerExpt.getElapsedTime()-timerGS.getElapsedTime());
|
|
||||||
}
|
|
||||||
|
|
||||||
int lastK = -1;
|
|
||||||
|
|
||||||
int numAlleles = alleles.size();
|
|
||||||
|
|
||||||
int idxDiag = numAlleles;
|
int idxDiag = numAlleles;
|
||||||
int incr = numAlleles - 1;
|
int incr = numAlleles - 1;
|
||||||
|
|
||||||
double[][] posteriorCache = new double[numAlleles-1][];
|
|
||||||
double[] bestAFguess = new double[numAlleles-1];
|
|
||||||
|
|
||||||
for (int k=1; k < numAlleles; k++) {
|
for (int k=1; k < numAlleles; k++) {
|
||||||
// multi-allelic approximation, part 1: Ideally
|
// multi-allelic approximation, part 1: Ideally
|
||||||
// for each alt allele compute marginal (suboptimal) posteriors -
|
// for each alt allele compute marginal (suboptimal) posteriors -
|
||||||
|
|
@ -121,24 +75,17 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
// So, for example, with 2 alt alleles, likelihoods have AA,AB,AC,BB,BC,CC.
|
// So, for example, with 2 alt alleles, likelihoods have AA,AB,AC,BB,BC,CC.
|
||||||
// 3 alt alleles: AA,AB,AC,AD BB BC BD CC CD DD
|
// 3 alt alleles: AA,AB,AC,AD BB BC BD CC CD DD
|
||||||
|
|
||||||
idxAA = 0;
|
final int idxAA = 0;
|
||||||
idxAB = k;
|
final int idxAB = k;
|
||||||
// yy is always element on the diagonal.
|
// yy is always element on the diagonal.
|
||||||
// 2 alleles: BBelement 2
|
// 2 alleles: BBelement 2
|
||||||
// 3 alleles: BB element 3. CC element 5
|
// 3 alleles: BB element 3. CC element 5
|
||||||
// 4 alleles:
|
// 4 alleles:
|
||||||
idxBB = idxDiag;
|
final int idxBB = idxDiag;
|
||||||
idxDiag += incr--;
|
idxDiag += incr--;
|
||||||
|
|
||||||
// todo - possible cleanup
|
final int lastK = linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB);
|
||||||
switch ( calcToUse ) {
|
|
||||||
case N2_GOLD_STANDARD:
|
|
||||||
lastK = gdaN2GoldStandard(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB);
|
|
||||||
break;
|
|
||||||
case LINEAR_EXPERIMENTAL:
|
|
||||||
lastK = linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (numAlleles > 2) {
|
if (numAlleles > 2) {
|
||||||
posteriorCache[k-1] = log10AlleleFrequencyPosteriors.clone();
|
posteriorCache[k-1] = log10AlleleFrequencyPosteriors.clone();
|
||||||
bestAFguess[k-1] = (double)MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors);
|
bestAFguess[k-1] = (double)MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors);
|
||||||
|
|
@ -153,39 +100,14 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
log10AlleleFrequencyPosteriors[k] = (posteriorCache[mostLikelyAlleleIdx][k]);
|
log10AlleleFrequencyPosteriors[k] = (posteriorCache[mostLikelyAlleleIdx][k]);
|
||||||
|
|
||||||
}
|
}
|
||||||
// todo -- REMOVE ME AFTER TESTING
|
|
||||||
// todo -- REMOVE ME AFTER TESTING
|
|
||||||
// todo -- REMOVE ME AFTER TESTING
|
|
||||||
if ( COMPARE_TO_GS ) {
|
|
||||||
gdaN2GoldStandard(GLs, log10AlleleFrequencyPriors, gsPosteriors, idxAA, idxAB, idxBB);
|
|
||||||
|
|
||||||
double log10thisPVar = Math.log10(MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors)[0]);
|
|
||||||
double log10gsPVar = Math.log10(MathUtils.normalizeFromLog10(gsPosteriors)[0]);
|
|
||||||
boolean eq = (log10thisPVar == Double.NEGATIVE_INFINITY && log10gsPVar == Double.NEGATIVE_INFINITY) || MathUtils.compareDoubles(log10thisPVar, log10gsPVar, 1e-4) == 0;
|
|
||||||
|
|
||||||
if ( ! eq || PRINT_LIKELIHOODS ) {
|
|
||||||
System.out.printf("----------------------------------------%n");
|
|
||||||
for (int k=0; k < log10AlleleFrequencyPosteriors.length; k++) {
|
|
||||||
double x = log10AlleleFrequencyPosteriors[k];
|
|
||||||
System.out.printf(" %d\t%.2f\t%.2f\t%b%n", k,
|
|
||||||
x < -1e10 ? Double.NEGATIVE_INFINITY : x, gsPosteriors[k],
|
|
||||||
log10AlleleFrequencyPosteriors[k] == gsPosteriors[k]);
|
|
||||||
}
|
|
||||||
System.out.printf("MAD_AC\t%d\t%d\t%.2f\t%.2f\t%.6f%n",
|
|
||||||
ref.getLocus().getStart(), lastK, log10thisPVar, log10gsPVar, log10thisPVar - log10gsPVar);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final ArrayList<double[]> getGLs(Map<String, Genotype> GLs) {
|
private static final ArrayList<double[]> getGLs(Map<String, Genotype> GLs) {
|
||||||
ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>();
|
ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>();
|
||||||
|
|
||||||
//int j = 0;
|
|
||||||
genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy
|
genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy
|
||||||
for ( Genotype sample : GLs.values() ) {
|
for ( Genotype sample : GLs.values() ) {
|
||||||
if ( sample.hasLikelihoods() ) {
|
if ( sample.hasLikelihoods() ) {
|
||||||
//double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(GLs.get(sample).getLikelihoods());
|
|
||||||
double[] gls = sample.getLikelihoods().getAsVector();
|
double[] gls = sample.getLikelihoods().getAsVector();
|
||||||
|
|
||||||
if (MathUtils.sum(gls) < SUM_GL_THRESH_NOCALL)
|
if (MathUtils.sum(gls) < SUM_GL_THRESH_NOCALL)
|
||||||
|
|
@ -240,84 +162,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// now with banding
|
|
||||||
public int linearExactBanded(Map<String, Genotype> GLs,
|
|
||||||
double[] log10AlleleFrequencyPriors,
|
|
||||||
double[] log10AlleleFrequencyPosteriors) {
|
|
||||||
throw new NotImplementedException();
|
|
||||||
// final int numSamples = GLs.size();
|
|
||||||
// final int numChr = 2*numSamples;
|
|
||||||
// final double[][] genotypeLikelihoods = getGLs(GLs);
|
|
||||||
//
|
|
||||||
// final ExactACCache logY = new ExactACCache(numSamples+1);
|
|
||||||
// logY.getkMinus0()[0] = 0.0; // the zero case
|
|
||||||
//
|
|
||||||
// double maxLog10L = Double.NEGATIVE_INFINITY;
|
|
||||||
// boolean done = false;
|
|
||||||
// int lastK = -1;
|
|
||||||
// final int BAND_SIZE = 10;
|
|
||||||
//
|
|
||||||
// for (int k=0; k <= numChr && ! done; k++ ) {
|
|
||||||
// final double[] kMinus0 = logY.getkMinus0();
|
|
||||||
// int jStart = Math.max(k - BAND_SIZE, 1);
|
|
||||||
// int jStop = Math.min(k + BAND_SIZE, numSamples);
|
|
||||||
//
|
|
||||||
// if ( k == 0 ) { // special case for k = 0
|
|
||||||
// for ( int j=1; j <= numSamples; j++ ) {
|
|
||||||
// kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods[j][GenotypeType.AA.ordinal()];
|
|
||||||
// }
|
|
||||||
// } else { // k > 0
|
|
||||||
// final double[] kMinus1 = logY.getkMinus1();
|
|
||||||
// final double[] kMinus2 = logY.getkMinus2();
|
|
||||||
// Arrays.fill(kMinus0,0);
|
|
||||||
//
|
|
||||||
// for ( int j = jStart; j <= jStop; j++ ) {
|
|
||||||
// final double[] gl = genotypeLikelihoods[j];
|
|
||||||
// final double logDenominator = log10Cache[2*j] + log10Cache[2*j-1];
|
|
||||||
//
|
|
||||||
// double aa = Double.NEGATIVE_INFINITY;
|
|
||||||
// double ab = Double.NEGATIVE_INFINITY;
|
|
||||||
// if (k < 2*j-1)
|
|
||||||
// aa = log10Cache[2*j-k] + log10Cache[2*j-k-1] + kMinus0[j-1] + gl[GenotypeType.AA.ordinal()];
|
|
||||||
//
|
|
||||||
// if (k < 2*j)
|
|
||||||
// ab = log10Cache[2*k] + log10Cache[2*j-k]+ kMinus1[j-1] + gl[GenotypeType.AB.ordinal()];
|
|
||||||
//
|
|
||||||
// double log10Max;
|
|
||||||
// if (k > 1) {
|
|
||||||
// final double bb = log10Cache[k] + log10Cache[k-1] + kMinus2[j-1] + gl[GenotypeType.BB.ordinal()];
|
|
||||||
// log10Max = approximateLog10SumLog10(aa, ab, bb);
|
|
||||||
// } else {
|
|
||||||
// // we know we aren't considering the BB case, so we can use an optimized log10 function
|
|
||||||
// log10Max = approximateLog10SumLog10(aa, ab);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// // finally, update the L(j,k) value
|
|
||||||
// kMinus0[j] = log10Max - logDenominator;
|
|
||||||
//
|
|
||||||
// String offset = Utils.dupString(' ',k);
|
|
||||||
// System.out.printf("%s%3d %3d %.2f%n", offset, k, j, kMinus0[j]);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// // update the posteriors vector
|
|
||||||
// final double log10LofK = kMinus0[jStop];
|
|
||||||
// log10AlleleFrequencyPosteriors[k] = log10LofK + log10AlleleFrequencyPriors[k];
|
|
||||||
//
|
|
||||||
// // can we abort early?
|
|
||||||
// lastK = k;
|
|
||||||
// maxLog10L = Math.max(maxLog10L, log10LofK);
|
|
||||||
// if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
|
|
||||||
// if ( DEBUG ) System.out.printf(" *** breaking early k=%d log10L=%.2f maxLog10L=%.2f%n", k, log10LofK, maxLog10L);
|
|
||||||
// done = true;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// logY.rotate();
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// return lastK;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int linearExact(Map<String, Genotype> GLs,
|
public int linearExact(Map<String, Genotype> GLs,
|
||||||
double[] log10AlleleFrequencyPriors,
|
double[] log10AlleleFrequencyPriors,
|
||||||
double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) {
|
double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) {
|
||||||
|
|
@ -605,82 +449,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
return calls;
|
return calls;
|
||||||
}
|
}
|
||||||
|
|
||||||
// -------------------------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// Gold standard, but O(N^2), implementation.
|
|
||||||
//
|
|
||||||
// TODO -- remove me for clarity in this code
|
|
||||||
//
|
|
||||||
// -------------------------------------------------------------------------------------
|
|
||||||
public int gdaN2GoldStandard(Map<String, Genotype> GLs,
|
|
||||||
double[] log10AlleleFrequencyPriors,
|
|
||||||
double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) {
|
|
||||||
int numSamples = GLs.size();
|
|
||||||
int numChr = 2*numSamples;
|
|
||||||
|
|
||||||
double[][] logYMatrix = new double[1+numSamples][1+numChr];
|
|
||||||
|
|
||||||
for (int i=0; i <=numSamples; i++)
|
|
||||||
for (int j=0; j <=numChr; j++)
|
|
||||||
logYMatrix[i][j] = Double.NEGATIVE_INFINITY;
|
|
||||||
|
|
||||||
//YMatrix[0][0] = 1.0;
|
|
||||||
logYMatrix[0][0] = 0.0;
|
|
||||||
int j=0;
|
|
||||||
|
|
||||||
for ( Map.Entry<String, Genotype> sample : GLs.entrySet() ) {
|
|
||||||
j++;
|
|
||||||
|
|
||||||
if ( !sample.getValue().hasLikelihoods() )
|
|
||||||
continue;
|
|
||||||
|
|
||||||
//double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(GLs.get(sample).getLikelihoods());
|
|
||||||
double[] genotypeLikelihoods = sample.getValue().getLikelihoods().getAsVector();
|
|
||||||
//double logDenominator = Math.log10(2.0*j*(2.0*j-1));
|
|
||||||
double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
|
|
||||||
|
|
||||||
// special treatment for k=0: iteration reduces to:
|
|
||||||
//YMatrix[j][0] = YMatrix[j-1][0]*genotypeLikelihoods[GenotypeType.AA.ordinal()];
|
|
||||||
logYMatrix[j][0] = logYMatrix[j-1][0] + genotypeLikelihoods[idxAA];
|
|
||||||
|
|
||||||
for (int k=1; k <= 2*j; k++ ) {
|
|
||||||
|
|
||||||
//double num = (2.0*j-k)*(2.0*j-k-1)*YMatrix[j-1][k] * genotypeLikelihoods[GenotypeType.AA.ordinal()];
|
|
||||||
double logNumerator[];
|
|
||||||
logNumerator = new double[3];
|
|
||||||
if (k < 2*j-1)
|
|
||||||
logNumerator[0] = MathUtils.log10Cache[2*j-k] + MathUtils.log10Cache[2*j-k-1] + logYMatrix[j-1][k] +
|
|
||||||
genotypeLikelihoods[idxAA];
|
|
||||||
else
|
|
||||||
logNumerator[0] = Double.NEGATIVE_INFINITY;
|
|
||||||
|
|
||||||
|
|
||||||
if (k < 2*j)
|
|
||||||
logNumerator[1] = MathUtils.log10Cache[2*k] + MathUtils.log10Cache[2*j-k]+ logYMatrix[j-1][k-1] +
|
|
||||||
genotypeLikelihoods[idxAB];
|
|
||||||
else
|
|
||||||
logNumerator[1] = Double.NEGATIVE_INFINITY;
|
|
||||||
|
|
||||||
if (k > 1)
|
|
||||||
logNumerator[2] = MathUtils.log10Cache[k] + MathUtils.log10Cache[k-1] + logYMatrix[j-1][k-2] +
|
|
||||||
genotypeLikelihoods[idxBB];
|
|
||||||
else
|
|
||||||
logNumerator[2] = Double.NEGATIVE_INFINITY;
|
|
||||||
|
|
||||||
double logNum = MathUtils.softMax(logNumerator);
|
|
||||||
|
|
||||||
//YMatrix[j][k] = num/den;
|
|
||||||
logYMatrix[j][k] = logNum - logDenominator;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int k=0; k <= numChr; k++)
|
|
||||||
log10AlleleFrequencyPosteriors[k] = logYMatrix[j][k] + log10AlleleFrequencyPriors[k];
|
|
||||||
|
|
||||||
return numChr;
|
|
||||||
}
|
|
||||||
|
|
||||||
private final static void printLikelihoods(int numChr, double[][] logYMatrix, double[] log10AlleleFrequencyPriors) {
|
private final static void printLikelihoods(int numChr, double[][] logYMatrix, double[] log10AlleleFrequencyPriors) {
|
||||||
int j = logYMatrix.length - 1;
|
int j = logYMatrix.length - 1;
|
||||||
System.out.printf("-----------------------------------%n");
|
System.out.printf("-----------------------------------%n");
|
||||||
|
|
@ -689,5 +457,4 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
System.out.printf(" %4d\t%8.2f\t%8.2f\t%8.2f%n", k, logYMatrix[j][k], log10AlleleFrequencyPriors[k], posterior);
|
System.out.printf(" %4d\t%8.2f\t%8.2f\t%8.2f%n", k, logYMatrix[j][k], log10AlleleFrequencyPriors[k], posterior);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -34,10 +34,9 @@ import org.broadinstitute.sting.gatk.walkers.indels.HaplotypeIndelErrorModel;
|
||||||
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
|
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.Haplotype;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||||
import org.broadinstitute.sting.utils.genotype.Haplotype;
|
|
||||||
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
|
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
||||||
|
|
@ -396,8 +395,8 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
System.out.format("hsize: %d eventLength: %d refSize: %d, locStart: %d numpr: %d\n",hsize,eventLength,
|
System.out.format("hsize: %d eventLength: %d refSize: %d, locStart: %d numpr: %d\n",hsize,eventLength,
|
||||||
(int)ref.getWindow().size(), loc.getStart(), numPrefBases);
|
(int)ref.getWindow().size(), loc.getStart(), numPrefBases);
|
||||||
//System.out.println(eventLength);
|
//System.out.println(eventLength);
|
||||||
haplotypeMap = Haplotype.makeHaplotypeListFromAlleles( alleleList, loc.getStart(),
|
haplotypeMap = Haplotype.makeHaplotypeListFromAlleles(alleleList, loc.getStart(),
|
||||||
ref, hsize, numPrefBases);
|
ref, hsize, numPrefBases);
|
||||||
|
|
||||||
// For each sample, get genotype likelihoods based on pileup
|
// For each sample, get genotype likelihoods based on pileup
|
||||||
// compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with them.
|
// compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with them.
|
||||||
|
|
|
||||||
|
|
@ -31,9 +31,9 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||||
|
|
@ -122,8 +122,10 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
||||||
aList.add(refAllele);
|
aList.add(refAllele);
|
||||||
aList.add(altAllele);
|
aList.add(altAllele);
|
||||||
double[] dlike = new double[]{likelihoods[refGenotype.ordinal()],likelihoods[hetGenotype.ordinal()],likelihoods[homGenotype.ordinal()]} ;
|
double[] dlike = new double[]{likelihoods[refGenotype.ordinal()],likelihoods[hetGenotype.ordinal()],likelihoods[homGenotype.ordinal()]} ;
|
||||||
|
|
||||||
|
// normalize in log space so that max element is zero.
|
||||||
GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(),
|
GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(),
|
||||||
aList, dlike, getFilteredDepth(pileup)));
|
aList, MathUtils.normalizeFromLog10(dlike, false, true), getFilteredDepth(pileup)));
|
||||||
}
|
}
|
||||||
|
|
||||||
return refAllele;
|
return refAllele;
|
||||||
|
|
|
||||||
|
|
@ -168,10 +168,6 @@ public class UnifiedArgumentCollection {
|
||||||
@Argument(fullName = "GSA_PRODUCTION_ONLY", shortName = "GSA_PRODUCTION_ONLY", doc = "don't ever use me", required = false)
|
@Argument(fullName = "GSA_PRODUCTION_ONLY", shortName = "GSA_PRODUCTION_ONLY", doc = "don't ever use me", required = false)
|
||||||
public boolean GSA_PRODUCTION_ONLY = false;
|
public boolean GSA_PRODUCTION_ONLY = false;
|
||||||
|
|
||||||
@Hidden
|
|
||||||
@Argument(fullName = "exactCalculation", shortName = "exactCalculation", doc = "expt", required = false)
|
|
||||||
public ExactAFCalculationModel.ExactCalculation EXACT_CALCULATION_TYPE = ExactAFCalculationModel.ExactCalculation.LINEAR_EXPERIMENTAL;
|
|
||||||
|
|
||||||
@Hidden
|
@Hidden
|
||||||
@Argument(fullName = "ignoreSNPAlleles", shortName = "ignoreSNPAlleles", doc = "expt", required = false)
|
@Argument(fullName = "ignoreSNPAlleles", shortName = "ignoreSNPAlleles", doc = "expt", required = false)
|
||||||
public boolean IGNORE_SNP_ALLELES = false;
|
public boolean IGNORE_SNP_ALLELES = false;
|
||||||
|
|
@ -191,7 +187,6 @@ public class UnifiedArgumentCollection {
|
||||||
|
|
||||||
uac.GLmodel = GLmodel;
|
uac.GLmodel = GLmodel;
|
||||||
uac.AFmodel = AFmodel;
|
uac.AFmodel = AFmodel;
|
||||||
uac.EXACT_CALCULATION_TYPE = EXACT_CALCULATION_TYPE;
|
|
||||||
uac.heterozygosity = heterozygosity;
|
uac.heterozygosity = heterozygosity;
|
||||||
uac.PCR_error = PCR_error;
|
uac.PCR_error = PCR_error;
|
||||||
uac.GenotypingMode = GenotypingMode;
|
uac.GenotypingMode = GenotypingMode;
|
||||||
|
|
|
||||||
|
|
@ -544,6 +544,21 @@ public class UnifiedGenotyperEngine {
|
||||||
AFs[i] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
|
AFs[i] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private final static double[] binomialProbabilityDepthCache = new double[10000];
|
||||||
|
static {
|
||||||
|
for ( int i = 1; i < binomialProbabilityDepthCache.length; i++ ) {
|
||||||
|
binomialProbabilityDepthCache[i] = MathUtils.binomialProbability(0, i, 0.5);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private final double getRefBinomialProb(final int depth) {
|
||||||
|
if ( depth < binomialProbabilityDepthCache.length )
|
||||||
|
return binomialProbabilityDepthCache[depth];
|
||||||
|
else
|
||||||
|
return MathUtils.binomialProbability(0, depth, 0.5);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private VariantCallContext estimateReferenceConfidence(VariantContext vc, Map<String, AlignmentContext> contexts, double theta, boolean ignoreCoveredSamples, double initialPofRef) {
|
private VariantCallContext estimateReferenceConfidence(VariantContext vc, Map<String, AlignmentContext> contexts, double theta, boolean ignoreCoveredSamples, double initialPofRef) {
|
||||||
if ( contexts == null )
|
if ( contexts == null )
|
||||||
return null;
|
return null;
|
||||||
|
|
@ -567,7 +582,7 @@ public class UnifiedGenotyperEngine {
|
||||||
depth = context.getExtendedEventPileup().size();
|
depth = context.getExtendedEventPileup().size();
|
||||||
}
|
}
|
||||||
|
|
||||||
P_of_ref *= 1.0 - (theta / 2.0) * MathUtils.binomialProbability(0, depth, 0.5);
|
P_of_ref *= 1.0 - (theta / 2.0) * getRefBinomialProb(depth);
|
||||||
}
|
}
|
||||||
|
|
||||||
return new VariantCallContext(vc, QualityUtils.phredScaleErrorRate(1.0 - P_of_ref) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING, false);
|
return new VariantCallContext(vc, QualityUtils.phredScaleErrorRate(1.0 - P_of_ref) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING, false);
|
||||||
|
|
|
||||||
|
|
@ -26,9 +26,9 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.indels;
|
package org.broadinstitute.sting.gatk.walkers.indels;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.utils.Haplotype;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
import org.broadinstitute.sting.utils.genotype.Haplotype;
|
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
|
|
|
||||||
|
|
@ -29,8 +29,8 @@ import net.sf.samtools.Cigar;
|
||||||
import net.sf.samtools.CigarElement;
|
import net.sf.samtools.CigarElement;
|
||||||
import net.sf.samtools.CigarOperator;
|
import net.sf.samtools.CigarOperator;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.utils.Haplotype;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.genotype.Haplotype;
|
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
@ -1041,20 +1041,14 @@ public class PairHMMIndelErrorModel {
|
||||||
double[] genotypeLikelihoods = new double[hSize*(hSize+1)/2];
|
double[] genotypeLikelihoods = new double[hSize*(hSize+1)/2];
|
||||||
|
|
||||||
int k=0;
|
int k=0;
|
||||||
double maxElement = Double.NEGATIVE_INFINITY;
|
|
||||||
for (int j=0; j < hSize; j++) {
|
for (int j=0; j < hSize; j++) {
|
||||||
for (int i=0; i <= j; i++){
|
for (int i=0; i <= j; i++){
|
||||||
genotypeLikelihoods[k++] = haplotypeLikehoodMatrix[i][j];
|
genotypeLikelihoods[k++] = haplotypeLikehoodMatrix[i][j];
|
||||||
if (haplotypeLikehoodMatrix[i][j] > maxElement)
|
|
||||||
maxElement = haplotypeLikehoodMatrix[i][j];
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// renormalize
|
// renormalize so that max element is zero.
|
||||||
for (int i=0; i < genotypeLikelihoods.length; i++)
|
return MathUtils.normalizeFromLog10(genotypeLikelihoods, false, true);
|
||||||
genotypeLikelihoods[i] -= maxElement;
|
|
||||||
|
|
||||||
return genotypeLikelihoods;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -42,6 +42,7 @@ import java.util.*;
|
||||||
*
|
*
|
||||||
* <p>Body test</p>
|
* <p>Body test</p>
|
||||||
*/
|
*/
|
||||||
|
@Hidden
|
||||||
public class DocumentationTest extends RodWalker<Integer, Integer> {
|
public class DocumentationTest extends RodWalker<Integer, Integer> {
|
||||||
// the docs for the arguments are in the collection
|
// the docs for the arguments are in the collection
|
||||||
@ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
|
@ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
|
||||||
|
|
|
||||||
|
|
@ -62,14 +62,17 @@ public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker t
|
||||||
annotationId++;
|
annotationId++;
|
||||||
} while (eval.hasAttribute(key));
|
} while (eval.hasAttribute(key));
|
||||||
|
|
||||||
} else if ( eval.hasAttribute(SnpEff.InfoFieldKey.EFFECT_KEY.getKeyName() ) ) {
|
} else if ( eval.hasAttribute(SnpEff.InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName()) ) {
|
||||||
SnpEff.EffectType snpEffType = SnpEff.EffectType.valueOf(eval.getAttribute(SnpEff.InfoFieldKey.EFFECT_KEY.getKeyName()).toString());
|
try {
|
||||||
if ( snpEffType == SnpEff.EffectType.STOP_GAINED )
|
SnpEff.EffectFunctionalClass snpEffFunctionalClass = SnpEff.EffectFunctionalClass.valueOf(eval.getAttribute(SnpEff.InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName()).toString());
|
||||||
type = FunctionalType.nonsense;
|
if ( snpEffFunctionalClass == SnpEff.EffectFunctionalClass.NONSENSE )
|
||||||
else if ( snpEffType == SnpEff.EffectType.NON_SYNONYMOUS_CODING )
|
type = FunctionalType.nonsense;
|
||||||
type = FunctionalType.missense;
|
else if ( snpEffFunctionalClass == SnpEff.EffectFunctionalClass.MISSENSE )
|
||||||
else if ( snpEffType == SnpEff.EffectType.SYNONYMOUS_CODING )
|
type = FunctionalType.missense;
|
||||||
type = FunctionalType.silent;
|
else if ( snpEffFunctionalClass == SnpEff.EffectFunctionalClass.SILENT )
|
||||||
|
type = FunctionalType.silent;
|
||||||
|
}
|
||||||
|
catch ( Exception e ) {} // don't error out if the type isn't supported
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( type != null ) {
|
if ( type != null ) {
|
||||||
|
|
|
||||||
|
|
@ -234,48 +234,17 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
||||||
if (minimumN > 1 && (vcs.size() - numFilteredRecords < minimumN))
|
if (minimumN > 1 && (vcs.size() - numFilteredRecords < minimumN))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
List<VariantContext> preMergedVCs = new ArrayList<VariantContext>();
|
List<VariantContext> mergedVCs = new ArrayList<VariantContext>();
|
||||||
Map<VariantContext.Type, List<VariantContext>> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs);
|
Map<VariantContext.Type, List<VariantContext>> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs);
|
||||||
// iterate over the types so that it's deterministic
|
// iterate over the types so that it's deterministic
|
||||||
for ( VariantContext.Type type : VariantContext.Type.values() ) {
|
for ( VariantContext.Type type : VariantContext.Type.values() ) {
|
||||||
if ( VCsByType.containsKey(type) )
|
if ( VCsByType.containsKey(type) )
|
||||||
preMergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type),
|
mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type),
|
||||||
priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
|
priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
|
||||||
SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
|
SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
|
||||||
}
|
}
|
||||||
|
|
||||||
List<VariantContext> mergedVCs = new ArrayList<VariantContext>();
|
for ( VariantContext mergedVC : mergedVCs ) {
|
||||||
// se have records merged but separated by type. If a particular record is for example a snp but all alleles are a subset of an existing mixed record,
|
|
||||||
// we will still merge those records.
|
|
||||||
if (preMergedVCs.size() > 1) {
|
|
||||||
for (VariantContext vc1 : preMergedVCs) {
|
|
||||||
VariantContext newvc = vc1;
|
|
||||||
boolean merged = false;
|
|
||||||
for (int k=0; k < mergedVCs.size(); k++) {
|
|
||||||
VariantContext vc2 = mergedVCs.get(k);
|
|
||||||
|
|
||||||
if (VariantContextUtils.allelesAreSubset(vc1,vc2) || VariantContextUtils.allelesAreSubset(vc2,vc1)) {
|
|
||||||
// all alleles of vc1 are contained in vc2 but they are of different type (say, vc1 is snp, vc2 is complex): try to merget v1 into v2
|
|
||||||
List<VariantContext> vcpair = new ArrayList<VariantContext>();
|
|
||||||
vcpair.add(vc1);
|
|
||||||
vcpair.add(vc2);
|
|
||||||
newvc = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), vcpair,
|
|
||||||
priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
|
|
||||||
SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC);
|
|
||||||
mergedVCs.set(k,newvc);
|
|
||||||
merged = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!merged)
|
|
||||||
mergedVCs.add(vc1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
mergedVCs = preMergedVCs;
|
|
||||||
}
|
|
||||||
|
|
||||||
for ( VariantContext mergedVC : mergedVCs ) {
|
|
||||||
// only operate at the start of events
|
// only operate at the start of events
|
||||||
if ( mergedVC == null )
|
if ( mergedVC == null )
|
||||||
continue;
|
continue;
|
||||||
|
|
|
||||||
|
|
@ -452,7 +452,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
throw new UserException.CouldNotCreateOutputFile(outMVFile, "Can't open output file", e); }
|
throw new UserException.CouldNotCreateOutputFile(outMVFile, "Can't open output file", e); }
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
mvSet.add(new MendelianViolation(getToolkit(), MENDELIAN_VIOLATION_QUAL_THRESHOLD));
|
mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));
|
||||||
}
|
}
|
||||||
else if (!FAMILY_STRUCTURE.isEmpty()) {
|
else if (!FAMILY_STRUCTURE.isEmpty()) {
|
||||||
mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));
|
mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));
|
||||||
|
|
|
||||||
|
|
@ -22,10 +22,9 @@
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils.genotype;
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
|
|
||||||
|
|
@ -444,11 +444,25 @@ public class MathUtils {
|
||||||
* @return a newly allocated array corresponding the normalized values in array, maybe log10 transformed
|
* @return a newly allocated array corresponding the normalized values in array, maybe log10 transformed
|
||||||
*/
|
*/
|
||||||
public static double[] normalizeFromLog10(double[] array, boolean takeLog10OfOutput) {
|
public static double[] normalizeFromLog10(double[] array, boolean takeLog10OfOutput) {
|
||||||
double[] normalized = new double[array.length];
|
return normalizeFromLog10(array, takeLog10OfOutput, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static double[] normalizeFromLog10(double[] array, boolean takeLog10OfOutput, boolean keepInLogSpace) {
|
||||||
|
|
||||||
// for precision purposes, we need to add (or really subtract, since they're
|
// for precision purposes, we need to add (or really subtract, since they're
|
||||||
// all negative) the largest value; also, we need to convert to normal-space.
|
// all negative) the largest value; also, we need to convert to normal-space.
|
||||||
double maxValue = Utils.findMaxEntry(array);
|
double maxValue = Utils.findMaxEntry(array);
|
||||||
|
|
||||||
|
// we may decide to just normalize in log space with converting to linear space
|
||||||
|
if (keepInLogSpace) {
|
||||||
|
for (int i = 0; i < array.length; i++)
|
||||||
|
array[i] -= maxValue;
|
||||||
|
return array;
|
||||||
|
}
|
||||||
|
|
||||||
|
// default case: go to linear space
|
||||||
|
double[] normalized = new double[array.length];
|
||||||
|
|
||||||
for (int i = 0; i < array.length; i++)
|
for (int i = 0; i < array.length; i++)
|
||||||
normalized[i] = Math.pow(10, array[i] - maxValue);
|
normalized[i] = Math.pow(10, array[i] - maxValue);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,13 @@
|
||||||
package org.broadinstitute.sting.utils;
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
|
import org.apache.commons.lang.ArrayUtils;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
|
|
@ -32,6 +34,9 @@ public class MendelianViolation {
|
||||||
|
|
||||||
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
|
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
|
||||||
|
|
||||||
|
static final int[] mvOffsets = new int[] { 1,2,5,6,8,11,15,18,20,21,24,25 };
|
||||||
|
static final int[] nonMVOffsets = new int[]{ 0,3,4,7,9,10,12,13,14,16,17,19,22,23,26 };
|
||||||
|
|
||||||
|
|
||||||
public String getSampleMom() {
|
public String getSampleMom() {
|
||||||
return sampleMom;
|
return sampleMom;
|
||||||
|
|
@ -168,4 +173,41 @@ public class MendelianViolation {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the likelihood ratio for a mendelian violation
|
||||||
|
*/
|
||||||
|
public double violationLikelihoodRatio(VariantContext vc) {
|
||||||
|
double[] logLikAssignments = new double[27];
|
||||||
|
// the matrix to set up is
|
||||||
|
// MOM DAD CHILD
|
||||||
|
// |- AA
|
||||||
|
// AA AA | AB
|
||||||
|
// |- BB
|
||||||
|
// |- AA
|
||||||
|
// AA AB | AB
|
||||||
|
// |- BB
|
||||||
|
// etc. The leaves are counted as 0-11 for MVs and 0-14 for non-MVs
|
||||||
|
double[] momGL = vc.getGenotype(sampleMom).getLikelihoods().getAsVector();
|
||||||
|
double[] dadGL = vc.getGenotype(sampleDad).getLikelihoods().getAsVector();
|
||||||
|
double[] childGL = vc.getGenotype(sampleChild).getLikelihoods().getAsVector();
|
||||||
|
int offset = 0;
|
||||||
|
for ( int oMom = 0; oMom < 3; oMom++ ) {
|
||||||
|
for ( int oDad = 0; oDad < 3; oDad++ ) {
|
||||||
|
for ( int oChild = 0; oChild < 3; oChild ++ ) {
|
||||||
|
logLikAssignments[offset++] = momGL[oMom] + dadGL[oDad] + childGL[oChild];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
double[] mvLiks = new double[12];
|
||||||
|
double[] nonMVLiks = new double[15];
|
||||||
|
for ( int i = 0; i < 12; i ++ ) {
|
||||||
|
mvLiks[i] = logLikAssignments[mvOffsets[i]];
|
||||||
|
}
|
||||||
|
|
||||||
|
for ( int i = 0; i < 15; i++) {
|
||||||
|
nonMVLiks[i] = logLikAssignments[nonMVOffsets[i]];
|
||||||
|
}
|
||||||
|
|
||||||
|
return MathUtils.log10sumLog10(mvLiks) - MathUtils.log10sumLog10(nonMVLiks);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -398,7 +398,9 @@ public class ClippingOp {
|
||||||
|
|
||||||
for (int i = 1; i <= 2; i++) {
|
for (int i = 1; i <= 2; i++) {
|
||||||
int shift = 0;
|
int shift = 0;
|
||||||
|
int totalHardClip = 0;
|
||||||
boolean readHasStarted = false;
|
boolean readHasStarted = false;
|
||||||
|
boolean addedHardClips = false;
|
||||||
|
|
||||||
while(!cigarStack.empty()) {
|
while(!cigarStack.empty()) {
|
||||||
CigarElement cigarElement = cigarStack.pop();
|
CigarElement cigarElement = cigarStack.pop();
|
||||||
|
|
@ -408,14 +410,33 @@ public class ClippingOp {
|
||||||
cigarElement.getOperator() != CigarOperator.DELETION &&
|
cigarElement.getOperator() != CigarOperator.DELETION &&
|
||||||
cigarElement.getOperator() != CigarOperator.HARD_CLIP)
|
cigarElement.getOperator() != CigarOperator.HARD_CLIP)
|
||||||
readHasStarted = true;
|
readHasStarted = true;
|
||||||
|
|
||||||
|
else if ( !readHasStarted && cigarElement.getOperator() == CigarOperator.HARD_CLIP)
|
||||||
|
totalHardClip += cigarElement.getLength();
|
||||||
|
|
||||||
else if ( !readHasStarted && cigarElement.getOperator() == CigarOperator.INSERTION)
|
else if ( !readHasStarted && cigarElement.getOperator() == CigarOperator.INSERTION)
|
||||||
shift += cigarElement.getLength();
|
shift += cigarElement.getLength();
|
||||||
|
|
||||||
if (readHasStarted || cigarElement.getOperator() == CigarOperator.HARD_CLIP) {
|
else if ( !readHasStarted && cigarElement.getOperator() == CigarOperator.DELETION)
|
||||||
if (i==1)
|
totalHardClip += cigarElement.getLength();
|
||||||
|
|
||||||
|
if (readHasStarted) {
|
||||||
|
if (i==1) {
|
||||||
|
if (!addedHardClips) {
|
||||||
|
if (totalHardClip > 0)
|
||||||
|
inverseCigarStack.push(new CigarElement(totalHardClip, CigarOperator.HARD_CLIP));
|
||||||
|
addedHardClips = true;
|
||||||
|
}
|
||||||
inverseCigarStack.push(cigarElement);
|
inverseCigarStack.push(cigarElement);
|
||||||
else
|
}
|
||||||
|
else {
|
||||||
|
if (!addedHardClips) {
|
||||||
|
if (totalHardClip > 0)
|
||||||
|
cleanCigar.add(new CigarElement(totalHardClip, CigarOperator.HARD_CLIP));
|
||||||
|
addedHardClips = true;
|
||||||
|
}
|
||||||
cleanCigar.add(cigarElement);
|
cleanCigar.add(cigarElement);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// first pass (i=1) is from end to start of the cigar elements
|
// first pass (i=1) is from end to start of the cigar elements
|
||||||
|
|
@ -432,39 +453,35 @@ public class ClippingOp {
|
||||||
}
|
}
|
||||||
|
|
||||||
private int calculateAlignmentStartShift(Cigar oldCigar, Cigar newCigar) {
|
private int calculateAlignmentStartShift(Cigar oldCigar, Cigar newCigar) {
|
||||||
int shift = 0;
|
int newShift = 0;
|
||||||
|
int oldShift = 0;
|
||||||
|
|
||||||
// Rewind to previous start (by counting everything that was already clipped in this read)
|
|
||||||
for (CigarElement cigarElement : oldCigar.getCigarElements()) {
|
|
||||||
if (!cigarElement.getOperator().consumesReferenceBases())
|
|
||||||
shift -= cigarElement.getLength();
|
|
||||||
else
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Advance to new start (by counting everything new that has been clipped )
|
|
||||||
for (CigarElement cigarElement : newCigar.getCigarElements()) {
|
for (CigarElement cigarElement : newCigar.getCigarElements()) {
|
||||||
if (!cigarElement.getOperator().consumesReferenceBases())
|
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP)
|
||||||
shift += cigarElement.getLength();
|
newShift += cigarElement.getLength();
|
||||||
else
|
else
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
return shift;
|
for (CigarElement cigarElement : oldCigar.getCigarElements()) {
|
||||||
|
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP )
|
||||||
|
oldShift += Math.min(cigarElement.getLength(), newShift - oldShift);
|
||||||
|
else
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return newShift - oldShift;
|
||||||
}
|
}
|
||||||
|
|
||||||
private int calculateHardClippingAlignmentShift(CigarElement cigarElement, int clippedLength) {
|
private int calculateHardClippingAlignmentShift(CigarElement cigarElement, int clippedLength) {
|
||||||
if (cigarElement.getOperator() == CigarOperator.INSERTION) {
|
// Insertions should be discounted from the total hard clip count
|
||||||
int cigarElementLength = cigarElement.getLength();
|
if (cigarElement.getOperator() == CigarOperator.INSERTION)
|
||||||
if (clippedLength >= cigarElementLength)
|
return -clippedLength;
|
||||||
return -cigarElement.getLength();
|
|
||||||
else
|
|
||||||
return -clippedLength;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cigarElement.getOperator() == CigarOperator.DELETION)
|
// Deletions should be added to the total hard clip count
|
||||||
|
else if (cigarElement.getOperator() == CigarOperator.DELETION)
|
||||||
return cigarElement.getLength();
|
return cigarElement.getLength();
|
||||||
|
|
||||||
|
// There is no shift if we are not clipping an indel
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,8 @@ public enum ClippingRepresentation {
|
||||||
SOFTCLIP_BASES,
|
SOFTCLIP_BASES,
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
* WARNING: THIS OPTION IS STILL UNDER DEVELOPMENT AND IS NOT SUPPORTED.
|
||||||
|
*
|
||||||
* Change the read's cigar string to hard clip (H, see sam-spec) away the bases.
|
* Change the read's cigar string to hard clip (H, see sam-spec) away the bases.
|
||||||
* Hard clipping, unlike soft clipping, actually removes bases from the read,
|
* Hard clipping, unlike soft clipping, actually removes bases from the read,
|
||||||
* reducing the resulting file's size but introducing an irrevesible (i.e.,
|
* reducing the resulting file's size but introducing an irrevesible (i.e.,
|
||||||
|
|
|
||||||
|
|
@ -69,21 +69,21 @@ public class ReadClipper {
|
||||||
|
|
||||||
private SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
|
private SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
|
||||||
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart);
|
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart);
|
||||||
int stop = (refStop < 0) ? read.getReadLength() - 1: ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop);
|
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop);
|
||||||
|
|
||||||
if (start < 0 || stop > read.getReadLength() - 1 + numDeletions(read))
|
if (start < 0 || stop > read.getReadLength() - 1)
|
||||||
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
|
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
|
||||||
|
|
||||||
// TODO add check in the Hardclip function
|
if ( start > stop ) {
|
||||||
if ( start > stop )
|
// stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, ReadUtils.getRefCoordSoftUnclippedEnd(read));
|
||||||
stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, ReadUtils.getRefCoordSoftUnclippedEnd(read));
|
throw new ReviewedStingException("START > STOP -- this should never happen -- call Mauricio!");
|
||||||
|
}
|
||||||
|
|
||||||
//This tries to fix the bug where the deletion is counted a read base and as a result, the hardCLipper runs into
|
//This tries to fix the bug where the deletion is counted a read base and as a result, the hardCLipper runs into
|
||||||
//an endless loop when hard clipping the cigar string because the read coordinates are not covered by the read
|
//an endless loop when hard clipping the cigar string because the read coordinates are not covered by the read
|
||||||
stop -= numDeletions(read);
|
// stop -= numDeletions(read);
|
||||||
if ( start > stop )
|
// if ( start > stop )
|
||||||
start -= numDeletions(read);
|
// start -= numDeletions(read);
|
||||||
|
|
||||||
|
|
||||||
//System.out.println("Clipping start/stop: " + start + "/" + stop);
|
//System.out.println("Clipping start/stop: " + start + "/" + stop);
|
||||||
|
|
@ -128,6 +128,39 @@ public class ReadClipper {
|
||||||
return this.clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
return this.clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public SAMRecord hardClipSoftClippedBases () {
|
||||||
|
int readIndex = 0;
|
||||||
|
int cutLeft = -1; // first position to hard clip (inclusive)
|
||||||
|
int cutRight = -1; // first position to hard clip (inclusive)
|
||||||
|
boolean rightTail = false; // trigger to stop clipping the left tail and start cutting the right tail
|
||||||
|
|
||||||
|
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
|
||||||
|
if (cigarElement.getOperator() == CigarOperator.SOFT_CLIP) {
|
||||||
|
if (rightTail) {
|
||||||
|
cutRight = readIndex;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
cutLeft = readIndex + cigarElement.getLength() - 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (cigarElement.getOperator() != CigarOperator.HARD_CLIP)
|
||||||
|
rightTail = true;
|
||||||
|
|
||||||
|
if (cigarElement.getOperator().consumesReadBases())
|
||||||
|
readIndex += cigarElement.getLength();
|
||||||
|
}
|
||||||
|
|
||||||
|
// It is extremely important that we cut the end first otherwise the read coordinates change.
|
||||||
|
if (cutRight >= 0)
|
||||||
|
this.addOp(new ClippingOp(cutRight, read.getReadLength() - 1));
|
||||||
|
if (cutLeft >= 0)
|
||||||
|
this.addOp(new ClippingOp(0, cutLeft));
|
||||||
|
|
||||||
|
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return a new read corresponding to this.read that's been clipped according to ops, if any are present.
|
* Return a new read corresponding to this.read that's been clipped according to ops, if any are present.
|
||||||
*
|
*
|
||||||
|
|
@ -150,4 +183,21 @@ public class ReadClipper {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public SAMRecord hardClipLeadingInsertions() {
|
||||||
|
for(CigarElement cigarElement : read.getCigar().getCigarElements()) {
|
||||||
|
if (cigarElement.getOperator() != CigarOperator.HARD_CLIP && cigarElement.getOperator() != CigarOperator.SOFT_CLIP &&
|
||||||
|
cigarElement.getOperator() != CigarOperator.INSERTION && cigarElement.getOperator() != CigarOperator.DELETION)
|
||||||
|
break;
|
||||||
|
|
||||||
|
else if (cigarElement.getOperator() == CigarOperator.INSERTION) {
|
||||||
|
this.addOp(new ClippingOp(0, cigarElement.getLength() - 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
else if (cigarElement.getOperator() == CigarOperator.DELETION) {
|
||||||
|
throw new ReviewedStingException("No read should start with a deletion. Aligner bug?");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -115,15 +115,21 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
||||||
}
|
}
|
||||||
arrayIndex++;
|
arrayIndex++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
boolean sawFormatTag = false;
|
||||||
if ( arrayIndex < strings.length ) {
|
if ( arrayIndex < strings.length ) {
|
||||||
if ( !strings[arrayIndex].equals("FORMAT") )
|
if ( !strings[arrayIndex].equals("FORMAT") )
|
||||||
throw new TribbleException.InvalidHeader("we were expecting column name 'FORMAT' but we saw '" + strings[arrayIndex] + "'");
|
throw new TribbleException.InvalidHeader("we were expecting column name 'FORMAT' but we saw '" + strings[arrayIndex] + "'");
|
||||||
|
sawFormatTag = true;
|
||||||
arrayIndex++;
|
arrayIndex++;
|
||||||
}
|
}
|
||||||
|
|
||||||
while (arrayIndex < strings.length)
|
while ( arrayIndex < strings.length )
|
||||||
auxTags.add(strings[arrayIndex++]);
|
auxTags.add(strings[arrayIndex++]);
|
||||||
|
|
||||||
|
if ( sawFormatTag && auxTags.size() == 0 )
|
||||||
|
throw new UserException.MalformedVCFHeader("The FORMAT field was provided but there is no genotype/sample data");
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
if ( str.startsWith("##INFO=") ) {
|
if ( str.startsWith("##INFO=") ) {
|
||||||
VCFInfoHeaderLine info = new VCFInfoHeaderLine(str.substring(7),version);
|
VCFInfoHeaderLine info = new VCFInfoHeaderLine(str.substring(7),version);
|
||||||
|
|
@ -200,28 +206,24 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
||||||
* @return a VariantContext
|
* @return a VariantContext
|
||||||
*/
|
*/
|
||||||
public Feature decode(String line) {
|
public Feature decode(String line) {
|
||||||
return reallyDecode(line);
|
// the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line
|
||||||
}
|
if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null;
|
||||||
|
|
||||||
private Feature reallyDecode(String line) {
|
// our header cannot be null, we need the genotype sample names and counts
|
||||||
// the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line
|
if (header == null) throw new ReviewedStingException("VCF Header cannot be null when decoding a record");
|
||||||
if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null;
|
|
||||||
|
|
||||||
// our header cannot be null, we need the genotype sample names and counts
|
if (parts == null)
|
||||||
if (header == null) throw new ReviewedStingException("VCF Header cannot be null when decoding a record");
|
parts = new String[Math.min(header.getColumnCount(), NUM_STANDARD_FIELDS+1)];
|
||||||
|
|
||||||
if (parts == null)
|
int nParts = ParsingUtils.split(line, parts, VCFConstants.FIELD_SEPARATOR_CHAR, true);
|
||||||
parts = new String[Math.min(header.getColumnCount(), NUM_STANDARD_FIELDS+1)];
|
|
||||||
|
|
||||||
int nParts = ParsingUtils.split(line, parts, VCFConstants.FIELD_SEPARATOR_CHAR, true);
|
// if we have don't have a header, or we have a header with no genotyping data check that we have eight columns. Otherwise check that we have nine (normal colummns + genotyping data)
|
||||||
|
if (( (header == null || !header.hasGenotypingData()) && nParts != NUM_STANDARD_FIELDS) ||
|
||||||
|
(header != null && header.hasGenotypingData() && nParts != (NUM_STANDARD_FIELDS + 1)) )
|
||||||
|
throw new UserException.MalformedVCF("there aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) +
|
||||||
|
" tokens, and saw " + nParts + " )", lineNo);
|
||||||
|
|
||||||
// if we have don't have a header, or we have a header with no genotyping data check that we have eight columns. Otherwise check that we have nine (normal colummns + genotyping data)
|
return parseVCFLine(parts);
|
||||||
if (( (header == null || !header.hasGenotypingData()) && nParts != NUM_STANDARD_FIELDS) ||
|
|
||||||
(header != null && header.hasGenotypingData() && nParts != (NUM_STANDARD_FIELDS + 1)) )
|
|
||||||
throw new UserException.MalformedVCF("there aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) +
|
|
||||||
" tokens, and saw " + nParts + " )", lineNo);
|
|
||||||
|
|
||||||
return parseVCFLine(parts);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void generateException(String message) {
|
protected void generateException(String message) {
|
||||||
|
|
|
||||||
|
|
@ -35,9 +35,6 @@ public class VCFHeader {
|
||||||
// the header string indicator
|
// the header string indicator
|
||||||
public static final String HEADER_INDICATOR = "#";
|
public static final String HEADER_INDICATOR = "#";
|
||||||
|
|
||||||
/** do we have genotying data? */
|
|
||||||
private boolean hasGenotypingData = false;
|
|
||||||
|
|
||||||
// were the input samples sorted originally (or are we sorting them)?
|
// were the input samples sorted originally (or are we sorting them)?
|
||||||
private boolean samplesWereAlreadySorted = true;
|
private boolean samplesWereAlreadySorted = true;
|
||||||
|
|
||||||
|
|
@ -57,17 +54,15 @@ public class VCFHeader {
|
||||||
* create a VCF header, given a list of meta data and auxillary tags
|
* create a VCF header, given a list of meta data and auxillary tags
|
||||||
*
|
*
|
||||||
* @param metaData the meta data associated with this header
|
* @param metaData the meta data associated with this header
|
||||||
* @param genotypeSampleNames the genotype format field, and the sample names
|
* @param genotypeSampleNames the sample names
|
||||||
*/
|
*/
|
||||||
public VCFHeader(Set<VCFHeaderLine> metaData, Set<String> genotypeSampleNames) {
|
public VCFHeader(Set<VCFHeaderLine> metaData, Set<String> genotypeSampleNames) {
|
||||||
mMetaData = new TreeSet<VCFHeaderLine>();
|
mMetaData = new TreeSet<VCFHeaderLine>();
|
||||||
if ( metaData != null )
|
if ( metaData != null )
|
||||||
mMetaData.addAll(metaData);
|
mMetaData.addAll(metaData);
|
||||||
for (String col : genotypeSampleNames) {
|
|
||||||
if (!col.equals("FORMAT"))
|
mGenotypeSampleNames.addAll(genotypeSampleNames);
|
||||||
mGenotypeSampleNames.add(col);
|
|
||||||
}
|
|
||||||
if (genotypeSampleNames.size() > 0) hasGenotypingData = true;
|
|
||||||
loadVCFVersion();
|
loadVCFVersion();
|
||||||
loadMetaDataMaps();
|
loadMetaDataMaps();
|
||||||
|
|
||||||
|
|
@ -157,7 +152,7 @@ public class VCFHeader {
|
||||||
* @return true if we have genotyping columns, false otherwise
|
* @return true if we have genotyping columns, false otherwise
|
||||||
*/
|
*/
|
||||||
public boolean hasGenotypingData() {
|
public boolean hasGenotypingData() {
|
||||||
return hasGenotypingData;
|
return mGenotypeSampleNames.size() > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -171,7 +166,7 @@ public class VCFHeader {
|
||||||
|
|
||||||
/** @return the column count */
|
/** @return the column count */
|
||||||
public int getColumnCount() {
|
public int getColumnCount() {
|
||||||
return HEADER_FIELDS.values().length + ((hasGenotypingData) ? mGenotypeSampleNames.size() + 1 : 0);
|
return HEADER_FIELDS.values().length + (hasGenotypingData() ? mGenotypeSampleNames.size() + 1 : 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -174,6 +174,12 @@ public class UserException extends ReviewedStingException {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static class MalformedVCFHeader extends UserException {
|
||||||
|
public MalformedVCFHeader(String message) {
|
||||||
|
super(String.format("The provided VCF file has a malformed header: %s", message));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static class ReadMissingReadGroup extends MalformedBAM {
|
public static class ReadMissingReadGroup extends MalformedBAM {
|
||||||
public ReadMissingReadGroup(SAMRecord read) {
|
public ReadMissingReadGroup(SAMRecord read) {
|
||||||
super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK. Please use http://www.broadinstitute.org/gsa/wiki/index.php/ReplaceReadGroups to fix this problem", read.getReadName()));
|
super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK. Please use http://www.broadinstitute.org/gsa/wiki/index.php/ReplaceReadGroups to fix this problem", read.getReadName()));
|
||||||
|
|
|
||||||
|
|
@ -66,7 +66,8 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {
|
||||||
public boolean includeInDocs(ClassDoc doc) {
|
public boolean includeInDocs(ClassDoc doc) {
|
||||||
try {
|
try {
|
||||||
Class type = HelpUtils.getClassForDoc(doc);
|
Class type = HelpUtils.getClassForDoc(doc);
|
||||||
return JVMUtils.isConcrete(type);
|
boolean hidden = ! getDoclet().showHiddenFeatures() && type.isAnnotationPresent(Hidden.class);
|
||||||
|
return ! hidden && JVMUtils.isConcrete(type);
|
||||||
} catch ( ClassNotFoundException e ) {
|
} catch ( ClassNotFoundException e ) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -157,7 +157,7 @@ public class ReadUtils {
|
||||||
* |----------------| (interval)
|
* |----------------| (interval)
|
||||||
* <--------> (read)
|
* <--------> (read)
|
||||||
*/
|
*/
|
||||||
public enum ReadAndIntervalOverlap {NO_OVERLAP_CONTIG, NO_OVERLAP_LEFT, NO_OVERLAP_RIGHT, OVERLAP_LEFT, OVERLAP_RIGHT, OVERLAP_LEFT_AND_RIGHT, OVERLAP_CONTAINED}
|
public enum ReadAndIntervalOverlap {NO_OVERLAP_CONTIG, NO_OVERLAP_LEFT, NO_OVERLAP_RIGHT, NO_OVERLAP_HARDCLIPPED_LEFT, NO_OVERLAP_HARDCLIPPED_RIGHT, OVERLAP_LEFT, OVERLAP_RIGHT, OVERLAP_LEFT_AND_RIGHT, OVERLAP_CONTAINED}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* God, there's a huge information asymmetry in SAM format:
|
* God, there's a huge information asymmetry in SAM format:
|
||||||
|
|
@ -640,27 +640,35 @@ public class ReadUtils {
|
||||||
*/
|
*/
|
||||||
public static ReadAndIntervalOverlap getReadAndIntervalOverlapType(SAMRecord read, GenomeLoc interval) {
|
public static ReadAndIntervalOverlap getReadAndIntervalOverlapType(SAMRecord read, GenomeLoc interval) {
|
||||||
|
|
||||||
int start = getRefCoordSoftUnclippedStart(read);
|
int sStart = getRefCoordSoftUnclippedStart(read);
|
||||||
int stop = getRefCoordSoftUnclippedEnd(read);
|
int sStop = getRefCoordSoftUnclippedEnd(read);
|
||||||
|
int uStart = read.getUnclippedStart();
|
||||||
|
int uStop = read.getUnclippedEnd();
|
||||||
|
|
||||||
if ( !read.getReferenceName().equals(interval.getContig()) )
|
if ( !read.getReferenceName().equals(interval.getContig()) )
|
||||||
return ReadAndIntervalOverlap.NO_OVERLAP_CONTIG;
|
return ReadAndIntervalOverlap.NO_OVERLAP_CONTIG;
|
||||||
|
|
||||||
else if ( stop < interval.getStart() )
|
else if ( uStop < interval.getStart() )
|
||||||
return ReadAndIntervalOverlap.NO_OVERLAP_LEFT;
|
return ReadAndIntervalOverlap.NO_OVERLAP_LEFT;
|
||||||
|
|
||||||
else if ( start > interval.getStop() )
|
else if ( uStart > interval.getStop() )
|
||||||
return ReadAndIntervalOverlap.NO_OVERLAP_RIGHT;
|
return ReadAndIntervalOverlap.NO_OVERLAP_RIGHT;
|
||||||
|
|
||||||
else if ( (start >= interval.getStart()) &&
|
else if ( sStop < interval.getStart() )
|
||||||
(stop <= interval.getStop()) )
|
return ReadAndIntervalOverlap.NO_OVERLAP_HARDCLIPPED_LEFT;
|
||||||
|
|
||||||
|
else if ( sStart > interval.getStop() )
|
||||||
|
return ReadAndIntervalOverlap.NO_OVERLAP_HARDCLIPPED_RIGHT;
|
||||||
|
|
||||||
|
else if ( (sStart >= interval.getStart()) &&
|
||||||
|
(sStop <= interval.getStop()) )
|
||||||
return ReadAndIntervalOverlap.OVERLAP_CONTAINED;
|
return ReadAndIntervalOverlap.OVERLAP_CONTAINED;
|
||||||
|
|
||||||
else if ( (start < interval.getStart()) &&
|
else if ( (sStart < interval.getStart()) &&
|
||||||
(stop > interval.getStop()) )
|
(sStop > interval.getStop()) )
|
||||||
return ReadAndIntervalOverlap.OVERLAP_LEFT_AND_RIGHT;
|
return ReadAndIntervalOverlap.OVERLAP_LEFT_AND_RIGHT;
|
||||||
|
|
||||||
else if ( (start < interval.getStart()) )
|
else if ( (sStart < interval.getStart()) )
|
||||||
return ReadAndIntervalOverlap.OVERLAP_LEFT;
|
return ReadAndIntervalOverlap.OVERLAP_LEFT;
|
||||||
|
|
||||||
else
|
else
|
||||||
|
|
@ -787,5 +795,47 @@ public class ReadUtils {
|
||||||
return readBases;
|
return readBases;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static SAMRecord unclipSoftClippedBases(SAMRecord rec) {
|
||||||
|
int newReadStart = rec.getAlignmentStart();
|
||||||
|
int newReadEnd = rec.getAlignmentEnd();
|
||||||
|
List<CigarElement> newCigarElements = new ArrayList<CigarElement>(rec.getCigar().getCigarElements().size());
|
||||||
|
int heldOver = -1;
|
||||||
|
boolean sSeen = false;
|
||||||
|
for ( CigarElement e : rec.getCigar().getCigarElements() ) {
|
||||||
|
if ( e.getOperator().equals(CigarOperator.S) ) {
|
||||||
|
newCigarElements.add(new CigarElement(e.getLength(),CigarOperator.M));
|
||||||
|
if ( sSeen ) {
|
||||||
|
newReadEnd += e.getLength();
|
||||||
|
sSeen = true;
|
||||||
|
} else {
|
||||||
|
newReadStart -= e.getLength();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
newCigarElements.add(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// merge duplicate operators together
|
||||||
|
int idx = 0;
|
||||||
|
List<CigarElement> finalCigarElements = new ArrayList<CigarElement>(rec.getCigar().getCigarElements().size());
|
||||||
|
while ( idx < newCigarElements.size() -1 ) {
|
||||||
|
if ( newCigarElements.get(idx).getOperator().equals(newCigarElements.get(idx+1).getOperator()) ) {
|
||||||
|
int combSize = newCigarElements.get(idx).getLength();
|
||||||
|
int offset = 0;
|
||||||
|
while ( idx + offset < newCigarElements.size()-1 && newCigarElements.get(idx+offset).getOperator().equals(newCigarElements.get(idx+1+offset).getOperator()) ) {
|
||||||
|
combSize += newCigarElements.get(idx+offset+1).getLength();
|
||||||
|
offset++;
|
||||||
|
}
|
||||||
|
finalCigarElements.add(new CigarElement(combSize,newCigarElements.get(idx).getOperator()));
|
||||||
|
idx = idx + offset -1;
|
||||||
|
} else {
|
||||||
|
finalCigarElements.add(newCigarElements.get(idx));
|
||||||
|
}
|
||||||
|
idx++;
|
||||||
|
}
|
||||||
|
|
||||||
|
rec.setCigar(new Cigar(finalCigarElements));
|
||||||
|
rec.setAlignmentStart(newReadStart);
|
||||||
|
|
||||||
|
return rec;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -26,14 +26,31 @@ public class Genotype {
|
||||||
protected boolean filtersWereAppliedToContext;
|
protected boolean filtersWereAppliedToContext;
|
||||||
|
|
||||||
public Genotype(String sampleName, List<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes, boolean isPhased) {
|
public Genotype(String sampleName, List<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes, boolean isPhased) {
|
||||||
|
this(sampleName, alleles, negLog10PError, filters, attributes, isPhased, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Genotype(String sampleName, List<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes, boolean isPhased, double[] log10Likelihoods) {
|
||||||
if ( alleles != null )
|
if ( alleles != null )
|
||||||
this.alleles = Collections.unmodifiableList(alleles);
|
this.alleles = Collections.unmodifiableList(alleles);
|
||||||
commonInfo = new InferredGeneticContext(sampleName, negLog10PError, filters, attributes);
|
commonInfo = new InferredGeneticContext(sampleName, negLog10PError, filters, attributes);
|
||||||
|
if ( log10Likelihoods != null )
|
||||||
|
commonInfo.putAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(log10Likelihoods));
|
||||||
filtersWereAppliedToContext = filters != null;
|
filtersWereAppliedToContext = filters != null;
|
||||||
this.isPhased = isPhased;
|
this.isPhased = isPhased;
|
||||||
validate();
|
validate();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new Genotype for sampleName with genotype according to alleles.
|
||||||
|
* @param sampleName
|
||||||
|
* @param alleles
|
||||||
|
* @param negLog10PError the confidence in these alleles
|
||||||
|
* @param log10Likelihoods a log10 likelihoods for each of the genotype combinations possible for alleles, in the standard VCF ordering, or null if not known
|
||||||
|
*/
|
||||||
|
public Genotype(String sampleName, List<Allele> alleles, double negLog10PError, double[] log10Likelihoods) {
|
||||||
|
this(sampleName, alleles, negLog10PError, null, null, false, log10Likelihoods);
|
||||||
|
}
|
||||||
|
|
||||||
public Genotype(String sampleName, List<Allele> alleles, double negLog10PError) {
|
public Genotype(String sampleName, List<Allele> alleles, double negLog10PError) {
|
||||||
this(sampleName, alleles, negLog10PError, null, null, false);
|
this(sampleName, alleles, negLog10PError, null, null, false);
|
||||||
}
|
}
|
||||||
|
|
@ -57,13 +74,6 @@ public class Genotype {
|
||||||
return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attributes, g.isPhased());
|
return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attributes, g.isPhased());
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Genotype removePLs(Genotype g) {
|
|
||||||
Map<String, Object> attrs = new HashMap<String, Object>(g.getAttributes());
|
|
||||||
attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
|
|
||||||
attrs.remove(VCFConstants.GENOTYPE_LIKELIHOODS_KEY);
|
|
||||||
return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attrs, g.isPhased());
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Genotype modifyAlleles(Genotype g, List<Allele> alleles) {
|
public static Genotype modifyAlleles(Genotype g, List<Allele> alleles) {
|
||||||
return new Genotype(g.getSampleName(), alleles, g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, g.getAttributes(), g.isPhased());
|
return new Genotype(g.getSampleName(), alleles, g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, g.getAttributes(), g.isPhased());
|
||||||
}
|
}
|
||||||
|
|
@ -258,7 +268,8 @@ public class Genotype {
|
||||||
* @param <V> the value type
|
* @param <V> the value type
|
||||||
* @return a sting, enclosed in {}, with comma seperated key value pairs in order of the keys
|
* @return a sting, enclosed in {}, with comma seperated key value pairs in order of the keys
|
||||||
*/
|
*/
|
||||||
public static <T extends Comparable<T>, V> String sortedString(Map<T, V> c) {
|
private static <T extends Comparable<T>, V> String sortedString(Map<T, V> c) {
|
||||||
|
// NOTE -- THIS IS COPIED FROM GATK UTILS TO ALLOW US TO KEEP A SEPARATION BETWEEN THE GATK AND VCF CODECS
|
||||||
List<T> t = new ArrayList<T>(c.keySet());
|
List<T> t = new ArrayList<T>(c.keySet());
|
||||||
Collections.sort(t);
|
Collections.sort(t);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1495,7 +1495,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
||||||
|
|
||||||
// Do not change the filter state if filters were not applied to this context
|
// Do not change the filter state if filters were not applied to this context
|
||||||
Set<String> inputVCFilters = inputVC.filtersWereAppliedToContext ? inputVC.getFilters() : null;
|
Set<String> inputVCFilters = inputVC.filtersWereAppliedToContext ? inputVC.getFilters() : null;
|
||||||
return new VariantContext(inputVC.getSource(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(), inputVCFilters, inputVC.getAttributes());
|
return new VariantContext(inputVC.getSource(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(), inputVCFilters, inputVC.getAttributes(),refByte);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
return inputVC;
|
return inputVC;
|
||||||
|
|
|
||||||
|
|
@ -44,6 +44,11 @@ import java.io.Serializable;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
public class VariantContextUtils {
|
public class VariantContextUtils {
|
||||||
|
public final static String MERGE_INTERSECTION = "Intersection";
|
||||||
|
public final static String MERGE_FILTER_IN_ALL = "FilteredInAll";
|
||||||
|
public final static String MERGE_REF_IN_ALL = "ReferenceInAll";
|
||||||
|
public final static String MERGE_FILTER_PREFIX = "filterIn";
|
||||||
|
|
||||||
final public static JexlEngine engine = new JexlEngine();
|
final public static JexlEngine engine = new JexlEngine();
|
||||||
static {
|
static {
|
||||||
engine.setSilent(false); // will throw errors now for selects that don't evaluate properly
|
engine.setSilent(false); // will throw errors now for selects that don't evaluate properly
|
||||||
|
|
@ -154,6 +159,13 @@ public class VariantContextUtils {
|
||||||
return "%." + precision + "f";
|
return "%." + precision + "f";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static Genotype removePLs(Genotype g) {
|
||||||
|
Map<String, Object> attrs = new HashMap<String, Object>(g.getAttributes());
|
||||||
|
attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
|
||||||
|
attrs.remove(VCFConstants.GENOTYPE_LIKELIHOODS_KEY);
|
||||||
|
return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attrs, g.isPhased());
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A simple but common wrapper for matching VariantContext objects using JEXL expressions
|
* A simple but common wrapper for matching VariantContext objects using JEXL expressions
|
||||||
*/
|
*/
|
||||||
|
|
@ -594,7 +606,9 @@ public class VariantContextUtils {
|
||||||
|
|
||||||
// if we have more alternate alleles in the merged VC than in one or more of the original VCs, we need to strip out the GL/PLs (because they are no longer accurate)
|
// if we have more alternate alleles in the merged VC than in one or more of the original VCs, we need to strip out the GL/PLs (because they are no longer accurate)
|
||||||
for ( VariantContext vc : VCs ) {
|
for ( VariantContext vc : VCs ) {
|
||||||
if ( vc.alleles.size() != alleles.size() ) {
|
if (vc.alleles.size() == 1)
|
||||||
|
continue;
|
||||||
|
if ( vc.alleles.size() != alleles.size()) {
|
||||||
genotypes = stripPLs(genotypes);
|
genotypes = stripPLs(genotypes);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
@ -609,19 +623,20 @@ public class VariantContextUtils {
|
||||||
if ( filteredRecordMergeType == FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED && nFiltered != VCs.size() )
|
if ( filteredRecordMergeType == FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED && nFiltered != VCs.size() )
|
||||||
filters.clear();
|
filters.clear();
|
||||||
|
|
||||||
|
|
||||||
if ( annotateOrigin ) { // we care about where the call came from
|
if ( annotateOrigin ) { // we care about where the call came from
|
||||||
String setValue;
|
String setValue;
|
||||||
if ( nFiltered == 0 && variantSources.size() == priorityListOfVCs.size() ) // nothing was unfiltered
|
if ( nFiltered == 0 && variantSources.size() == priorityListOfVCs.size() ) // nothing was unfiltered
|
||||||
setValue = "Intersection";
|
setValue = MERGE_INTERSECTION;
|
||||||
else if ( nFiltered == VCs.size() ) // everything was filtered out
|
else if ( nFiltered == VCs.size() ) // everything was filtered out
|
||||||
setValue = "FilteredInAll";
|
setValue = MERGE_FILTER_IN_ALL;
|
||||||
else if ( variantSources.isEmpty() ) // everyone was reference
|
else if ( variantSources.isEmpty() ) // everyone was reference
|
||||||
setValue = "ReferenceInAll";
|
setValue = MERGE_REF_IN_ALL;
|
||||||
else {
|
else {
|
||||||
LinkedHashSet<String> s = new LinkedHashSet<String>();
|
LinkedHashSet<String> s = new LinkedHashSet<String>();
|
||||||
for ( VariantContext vc : VCs )
|
for ( VariantContext vc : VCs )
|
||||||
if ( vc.isVariant() )
|
if ( vc.isVariant() )
|
||||||
s.add( vc.isFiltered() ? "filterIn" + vc.getSource() : vc.getSource() );
|
s.add( vc.isFiltered() ? MERGE_FILTER_PREFIX + vc.getSource() : vc.getSource() );
|
||||||
setValue = Utils.join("-", s);
|
setValue = Utils.join("-", s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -734,7 +749,7 @@ public class VariantContextUtils {
|
||||||
Map<String, Genotype> newGs = new HashMap<String, Genotype>(genotypes.size());
|
Map<String, Genotype> newGs = new HashMap<String, Genotype>(genotypes.size());
|
||||||
|
|
||||||
for ( Map.Entry<String, Genotype> g : genotypes.entrySet() ) {
|
for ( Map.Entry<String, Genotype> g : genotypes.entrySet() ) {
|
||||||
newGs.put(g.getKey(), g.getValue().hasLikelihoods() ? Genotype.removePLs(g.getValue()) : g.getValue());
|
newGs.put(g.getKey(), g.getValue().hasLikelihoods() ? removePLs(g.getValue()) : g.getValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
return newGs;
|
return newGs;
|
||||||
|
|
@ -743,9 +758,46 @@ public class VariantContextUtils {
|
||||||
public static Map<VariantContext.Type, List<VariantContext>> separateVariantContextsByType(Collection<VariantContext> VCs) {
|
public static Map<VariantContext.Type, List<VariantContext>> separateVariantContextsByType(Collection<VariantContext> VCs) {
|
||||||
HashMap<VariantContext.Type, List<VariantContext>> mappedVCs = new HashMap<VariantContext.Type, List<VariantContext>>();
|
HashMap<VariantContext.Type, List<VariantContext>> mappedVCs = new HashMap<VariantContext.Type, List<VariantContext>>();
|
||||||
for ( VariantContext vc : VCs ) {
|
for ( VariantContext vc : VCs ) {
|
||||||
if ( !mappedVCs.containsKey(vc.getType()) )
|
|
||||||
mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
|
// look at previous variant contexts of different type. If:
|
||||||
mappedVCs.get(vc.getType()).add(vc);
|
// a) otherVC has alleles which are subset of vc, remove otherVC from its list and add otherVC to vc's list
|
||||||
|
// b) vc has alleles which are subset of otherVC. Then, add vc to otherVC's type list (rather, do nothing since vc will be added automatically to its list)
|
||||||
|
// c) neither: do nothing, just add vc to its own list
|
||||||
|
boolean addtoOwnList = true;
|
||||||
|
for (VariantContext.Type type : VariantContext.Type.values()) {
|
||||||
|
if (type.equals(vc.getType()))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (!mappedVCs.containsKey(type))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
List<VariantContext> vcList = mappedVCs.get(type);
|
||||||
|
for (int k=0; k < vcList.size(); k++) {
|
||||||
|
VariantContext otherVC = vcList.get(k);
|
||||||
|
if (allelesAreSubset(otherVC,vc)) {
|
||||||
|
// otherVC has a type different than vc and its alleles are a subset of vc: remove otherVC from its list and add it to vc's type list
|
||||||
|
vcList.remove(k);
|
||||||
|
// avoid having empty lists
|
||||||
|
if (vcList.size() == 0)
|
||||||
|
mappedVCs.remove(vcList);
|
||||||
|
if ( !mappedVCs.containsKey(vc.getType()) )
|
||||||
|
mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
|
||||||
|
mappedVCs.get(vc.getType()).add(otherVC);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else if (allelesAreSubset(vc,otherVC)) {
|
||||||
|
// vc has a type different than otherVC and its alleles are a subset of VC: add vc to otherVC's type list and don't add to its own
|
||||||
|
mappedVCs.get(type).add(vc);
|
||||||
|
addtoOwnList = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (addtoOwnList) {
|
||||||
|
if ( !mappedVCs.containsKey(vc.getType()) )
|
||||||
|
mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
|
||||||
|
mappedVCs.get(vc.getType()).add(vc);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return mappedVCs;
|
return mappedVCs;
|
||||||
|
|
|
||||||
|
|
@ -132,15 +132,21 @@ public abstract class BaseTest {
|
||||||
*/
|
*/
|
||||||
public static class TestDataProvider {
|
public static class TestDataProvider {
|
||||||
private static final Map<Class, List<Object>> tests = new HashMap<Class, List<Object>>();
|
private static final Map<Class, List<Object>> tests = new HashMap<Class, List<Object>>();
|
||||||
|
private final String name;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new TestDataProvider instance bound to the class variable C
|
* Create a new TestDataProvider instance bound to the class variable C
|
||||||
* @param c
|
* @param c
|
||||||
*/
|
*/
|
||||||
public TestDataProvider(Class c) {
|
public TestDataProvider(Class c, String name) {
|
||||||
if ( ! tests.containsKey(c) )
|
if ( ! tests.containsKey(c) )
|
||||||
tests.put(c, new ArrayList<Object>());
|
tests.put(c, new ArrayList<Object>());
|
||||||
tests.get(c).add(this);
|
tests.get(c).add(this);
|
||||||
|
this.name = name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public TestDataProvider(Class c) {
|
||||||
|
this(c, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -153,6 +159,11 @@ public abstract class BaseTest {
|
||||||
for ( Object x : tests.get(c) ) params2.add(new Object[]{x});
|
for ( Object x : tests.get(c) ) params2.add(new Object[]{x});
|
||||||
return params2.toArray(new Object[][]{});
|
return params2.toArray(new Object[][]{});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "TestDataProvider("+name+")";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,7 @@ public class SymbolicAllelesIntegrationTest extends WalkerTest {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString(b36KGReference, "symbolic_alleles_2.vcf"),
|
baseTestString(b36KGReference, "symbolic_alleles_2.vcf"),
|
||||||
1,
|
1,
|
||||||
Arrays.asList("6645babc8c7d46be0da223477c7b1291"));
|
Arrays.asList("3008d6f5044bc14801e5c58d985dec72"));
|
||||||
executeTest("Test symbolic alleles mixed in with non-symbolic alleles", spec);
|
executeTest("Test symbolic alleles mixed in with non-symbolic alleles", spec);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -132,9 +132,9 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
"-T VariantAnnotator -R " + hg19Reference + " -NO_HEADER -o %s -A SnpEff --variant " +
|
"-T VariantAnnotator -R " + hg19Reference + " -NO_HEADER -o %s -A SnpEff --variant " +
|
||||||
validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile " + validationDataLocation +
|
validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile " + validationDataLocation +
|
||||||
"snpEff.AFR.unfiltered.vcf -L 1:1-1,500,000",
|
"snpEff.AFR.unfiltered.vcf -L 1:1-1,500,000 -L 2:232,325,429",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("ed9d1b37b0bd8b65ff9ce2688e0e102e")
|
Arrays.asList("122321a85e448f21679f6ca15c5e22ad")
|
||||||
);
|
);
|
||||||
executeTest("Testing SnpEff annotations", spec);
|
executeTest("Testing SnpEff annotations", spec);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
package org.broadinstitute.sting.utils.genotype;
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -42,7 +42,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||||
Arrays.asList("ec43daadfb15b00b41aeb0017a45df0b"));
|
Arrays.asList("6458f3b8fe4954e2ffc2af972aaab19e"));
|
||||||
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
|
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
||||||
"-o %s"
|
"-o %s"
|
||||||
),
|
),
|
||||||
1,
|
1,
|
||||||
Arrays.asList("f5f811ceb973d7fd6c1b2b734f1b2b12")
|
Arrays.asList("d9dcb352c53106f54fcc981f15d35a90")
|
||||||
);
|
);
|
||||||
executeTest("testFunctionClassWithSnpeff", spec);
|
executeTest("testFunctionClassWithSnpeff", spec);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -89,8 +89,8 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
||||||
@Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "0f873fed02aa99db5b140bcd6282c10a"); }
|
@Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "0f873fed02aa99db5b140bcd6282c10a"); }
|
||||||
|
|
||||||
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f"); } // official project VCF files in tabix format
|
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f"); } // official project VCF files in tabix format
|
||||||
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "20163d60f18a46496f6da744ab5cc0f9"); } // official project VCF files in tabix format
|
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "96941ee177b0614a9879af0ac3218963"); } // official project VCF files in tabix format
|
||||||
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "312a22aedb088b678bc891f1a1b03c91"); }
|
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "f1c8720fde62687c2e861217670d8b3c"); }
|
||||||
|
|
||||||
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e144b6283765494bfe8189ac59965083"); }
|
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e144b6283765494bfe8189ac59965083"); }
|
||||||
|
|
||||||
|
|
@ -110,7 +110,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
||||||
" -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
|
" -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
|
||||||
" -genotypeMergeOptions UNIQUIFY -L 1"),
|
" -genotypeMergeOptions UNIQUIFY -L 1"),
|
||||||
1,
|
1,
|
||||||
Arrays.asList("35acb0f15f9cd18c653ede4e15e365c9"));
|
Arrays.asList("b14f8cbb5d03a2e613b12da4da9efd9a"));
|
||||||
executeTest("threeWayWithRefs", spec);
|
executeTest("threeWayWithRefs", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -137,7 +137,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
"-T CombineVariants -NO_HEADER -L 1:902000-903000 -o %s -R " + b37KGReference + " -V:v1 " + b37dbSNP132,
|
"-T CombineVariants -NO_HEADER -L 1:902000-903000 -o %s -R " + b37KGReference + " -V:v1 " + b37dbSNP132,
|
||||||
1,
|
1,
|
||||||
Arrays.asList(""));
|
Arrays.asList("5969446769cb8377daa2db29304ae6b5"));
|
||||||
executeTest("combineDBSNPDuplicateSites:", spec);
|
executeTest("combineDBSNPDuplicateSites:", spec);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -16,7 +16,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
||||||
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
|
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
|
||||||
|
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant:VCF3 " + testfile),
|
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile),
|
||||||
1,
|
1,
|
||||||
Arrays.asList("d18516c1963802e92cb9e425c0b75fd6")
|
Arrays.asList("d18516c1963802e92cb9e425c0b75fd6")
|
||||||
);
|
);
|
||||||
|
|
@ -30,7 +30,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
||||||
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
|
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
|
||||||
|
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
"-T SelectVariants -R " + b36KGReference + " -L 1:1-1000000 -o %s -NO_HEADER -xl_sn A -xl_sf " + samplesFile + " --variant:VCF3 " + testfile,
|
"-T SelectVariants -R " + b36KGReference + " -L 1:1-1000000 -o %s -NO_HEADER -xl_sn A -xl_sf " + samplesFile + " --variant " + testfile,
|
||||||
1,
|
1,
|
||||||
Arrays.asList("730f021fd6ecf1d195dabbee2e233bfd")
|
Arrays.asList("730f021fd6ecf1d195dabbee2e233bfd")
|
||||||
);
|
);
|
||||||
|
|
@ -43,7 +43,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
||||||
String testfile = validationDataLocation + "test.dup.vcf";
|
String testfile = validationDataLocation + "test.dup.vcf";
|
||||||
|
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString(" -sn A -sn B -sn C --variant:VCF3 " + testfile),
|
baseTestString(" -sn A -sn B -sn C --variant " + testfile),
|
||||||
1,
|
1,
|
||||||
Arrays.asList("b74038779fe6485dbb8734ae48178356")
|
Arrays.asList("b74038779fe6485dbb8734ae48178356")
|
||||||
);
|
);
|
||||||
|
|
@ -56,7 +56,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
||||||
String testFile = validationDataLocation + "NA12878.hg19.example1.vcf";
|
String testFile = validationDataLocation + "NA12878.hg19.example1.vcf";
|
||||||
|
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
"-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 --variant:VCF " + b37hapmapGenotypes + " -disc:VCF " + testFile + " -o %s -NO_HEADER",
|
"-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 --variant " + b37hapmapGenotypes + " -disc " + testFile + " -o %s -NO_HEADER",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("78e6842325f1f1bc9ab30d5e7737ee6e")
|
Arrays.asList("78e6842325f1f1bc9ab30d5e7737ee6e")
|
||||||
);
|
);
|
||||||
|
|
@ -69,7 +69,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
||||||
String testFile = validationDataLocation + "NA12878.hg19.example1.vcf";
|
String testFile = validationDataLocation + "NA12878.hg19.example1.vcf";
|
||||||
|
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
"-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 -conc:VCF " + b37hapmapGenotypes + " --variant " + testFile + " -o %s -NO_HEADER",
|
"-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 -conc " + b37hapmapGenotypes + " --variant " + testFile + " -o %s -NO_HEADER",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("d2ba3ea30a810f6f0fbfb1b643292b6a")
|
Arrays.asList("d2ba3ea30a810f6f0fbfb1b643292b6a")
|
||||||
);
|
);
|
||||||
|
|
@ -90,16 +90,16 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
||||||
executeTest("testVariantTypeSelection--" + testFile, spec);
|
executeTest("testVariantTypeSelection--" + testFile, spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled=false)
|
@Test
|
||||||
public void testRemovePLs() {
|
public void testUsingDbsnpName() {
|
||||||
String testFile = validationDataLocation + "combine.3.vcf";
|
String testFile = validationDataLocation + "combine.3.vcf";
|
||||||
|
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
"-T SelectVariants -R " + b36KGReference + " -sn NA12892 --variant " + testFile + " -o %s -NO_HEADER",
|
"-T SelectVariants -R " + b36KGReference + " -sn NA12892 --variant:dbsnp " + testFile + " -o %s -NO_HEADER",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("")
|
Arrays.asList("167a1265df820978a74c267df44d5c43")
|
||||||
);
|
);
|
||||||
|
|
||||||
executeTest("testWithPLs--" + testFile, spec);
|
executeTest("testUsingDbsnpName--" + testFile, spec);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,7 @@ import java.io.File;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
public class JnaSessionIntegrationTest extends BaseTest {
|
public class JnaSessionIntegrationTest extends BaseTest {
|
||||||
|
private String implementation = null;
|
||||||
private static final SessionFactory factory = new JnaSessionFactory();
|
private static final SessionFactory factory = new JnaSessionFactory();
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -44,10 +45,23 @@ public class JnaSessionIntegrationTest extends BaseTest {
|
||||||
System.out.println(String.format("DRMAA contact(s): %s", session.getContact()));
|
System.out.println(String.format("DRMAA contact(s): %s", session.getContact()));
|
||||||
System.out.println(String.format("DRM system(s): %s", session.getDrmSystem()));
|
System.out.println(String.format("DRM system(s): %s", session.getDrmSystem()));
|
||||||
System.out.println(String.format("DRMAA implementation(s): %s", session.getDrmaaImplementation()));
|
System.out.println(String.format("DRMAA implementation(s): %s", session.getDrmaaImplementation()));
|
||||||
|
this.implementation = session.getDrmaaImplementation();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test(dependsOnMethods = { "testDrmaa" })
|
||||||
public void testSubmitEcho() throws Exception {
|
public void testSubmitEcho() throws Exception {
|
||||||
|
if (implementation.contains("LSF")) {
|
||||||
|
System.err.println(" ***********************************************************");
|
||||||
|
System.err.println(" *************************************************************");
|
||||||
|
System.err.println(" **** ****");
|
||||||
|
System.err.println(" **** Skipping JnaSessionIntegrationTest.testSubmitEcho() ****");
|
||||||
|
System.err.println(" **** Are you using the dotkit .combined_LSF_SGE? ****");
|
||||||
|
System.err.println(" **** ****");
|
||||||
|
System.err.println(" *************************************************************");
|
||||||
|
System.err.println(" ***********************************************************");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
File outFile = createNetworkTempFile("JnaSessionIntegrationTest-", ".out");
|
File outFile = createNetworkTempFile("JnaSessionIntegrationTest-", ".out");
|
||||||
Session session = factory.getSession();
|
Session session = factory.getSession();
|
||||||
session.init(null);
|
session.init(null);
|
||||||
|
|
|
||||||
|
|
@ -105,7 +105,6 @@ public class VCFWriterUnitTest extends BaseTest {
|
||||||
public static VCFHeader createFakeHeader(Set<VCFHeaderLine> metaData, Set<String> additionalColumns) {
|
public static VCFHeader createFakeHeader(Set<VCFHeaderLine> metaData, Set<String> additionalColumns) {
|
||||||
metaData.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_0.getFormatString(), VCFHeaderVersion.VCF4_0.getVersionString()));
|
metaData.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_0.getFormatString(), VCFHeaderVersion.VCF4_0.getVersionString()));
|
||||||
metaData.add(new VCFHeaderLine("two", "2"));
|
metaData.add(new VCFHeaderLine("two", "2"));
|
||||||
additionalColumns.add("FORMAT");
|
|
||||||
additionalColumns.add("extra1");
|
additionalColumns.add("extra1");
|
||||||
additionalColumns.add("extra2");
|
additionalColumns.add("extra2");
|
||||||
return new VCFHeader(metaData, additionalColumns);
|
return new VCFHeader(metaData, additionalColumns);
|
||||||
|
|
@ -159,6 +158,6 @@ public class VCFWriterUnitTest extends BaseTest {
|
||||||
Assert.assertTrue(additionalColumns.contains(key));
|
Assert.assertTrue(additionalColumns.contains(key));
|
||||||
index++;
|
index++;
|
||||||
}
|
}
|
||||||
Assert.assertEquals(index+1, additionalColumns.size() /* for the header field we don't see */);
|
Assert.assertEquals(index, additionalColumns.size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,84 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// our package
|
||||||
|
package org.broadinstitute.sting.utils.variantcontext;
|
||||||
|
|
||||||
|
|
||||||
|
// the imports for unit testing.
|
||||||
|
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.BeforeSuite;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
|
||||||
|
public class GenotypeUnitTest extends BaseTest {
|
||||||
|
Allele A, Aref, T;
|
||||||
|
|
||||||
|
@BeforeSuite
|
||||||
|
public void before() {
|
||||||
|
A = Allele.create("A");
|
||||||
|
Aref = Allele.create("A", true);
|
||||||
|
T = Allele.create("T");
|
||||||
|
}
|
||||||
|
|
||||||
|
// public Genotype(String sampleName, List<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes, boolean isPhased) {
|
||||||
|
// public Genotype(String sampleName, List<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes, boolean isPhased, double[] log10Likelihoods) {
|
||||||
|
// public Genotype(String sampleName, List<Allele> alleles, double negLog10PError, double[] log10Likelihoods)
|
||||||
|
// public Genotype(String sampleName, List<Allele> alleles, double negLog10PError)
|
||||||
|
// public Genotype(String sampleName, List<Allele> alleles)
|
||||||
|
// public List<Allele> getAlleles()
|
||||||
|
// public List<Allele> getAlleles(Allele allele)
|
||||||
|
// public Allele getAllele(int i)
|
||||||
|
// public boolean isPhased()
|
||||||
|
// public int getPloidy()
|
||||||
|
// public Type getType()
|
||||||
|
// public boolean isHom()
|
||||||
|
// public boolean isHomRef()
|
||||||
|
// public boolean isHomVar()
|
||||||
|
// public boolean isHet()
|
||||||
|
// public boolean isNoCall()
|
||||||
|
// public boolean isCalled()
|
||||||
|
// public boolean isAvailable()
|
||||||
|
// public boolean hasLikelihoods()
|
||||||
|
// public GenotypeLikelihoods getLikelihoods()
|
||||||
|
// public boolean sameGenotype(Genotype other)
|
||||||
|
// public boolean sameGenotype(Genotype other, boolean ignorePhase)
|
||||||
|
// public String getSampleName()
|
||||||
|
// public boolean hasNegLog10PError()
|
||||||
|
// public double getNegLog10PError()
|
||||||
|
// public double getPhredScaledQual()
|
||||||
|
// public boolean hasAttribute(String key)
|
||||||
|
// public Object getAttribute(String key)
|
||||||
|
// public Object getAttribute(String key, Object defaultValue)
|
||||||
|
// public String getAttributeAsString(String key, String defaultValue)
|
||||||
|
// public int getAttributeAsInt(String key, int defaultValue)
|
||||||
|
// public double getAttributeAsDouble(String key, double defaultValue)
|
||||||
|
// public boolean getAttributeAsBoolean(String key, boolean defaultValue)
|
||||||
|
}
|
||||||
|
|
@ -5,6 +5,7 @@ package org.broadinstitute.sting.utils.variantcontext;
|
||||||
// the imports for unit testing.
|
// the imports for unit testing.
|
||||||
|
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.BeforeSuite;
|
import org.testng.annotations.BeforeSuite;
|
||||||
import org.testng.annotations.BeforeTest;
|
import org.testng.annotations.BeforeTest;
|
||||||
|
|
@ -14,10 +15,7 @@ import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
|
||||||
/**
|
public class VariantContextUnitTest extends BaseTest {
|
||||||
* Basic unit test for RecalData
|
|
||||||
*/
|
|
||||||
public class VariantContextUnitTest {
|
|
||||||
Allele A, Aref, T, Tref;
|
Allele A, Aref, T, Tref;
|
||||||
Allele del, delRef, ATC, ATCref;
|
Allele del, delRef, ATC, ATCref;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,562 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
package org.broadinstitute.sting.utils.variantcontext;
|
||||||
|
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||||
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.BeforeSuite;
|
||||||
|
import org.testng.annotations.DataProvider;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
public class VariantContextUtilsUnitTest extends BaseTest {
|
||||||
|
Allele Aref, T, C, delRef, Cref, ATC, ATCATC;
|
||||||
|
private GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
|
@BeforeSuite
|
||||||
|
public void setup() {
|
||||||
|
final File referenceFile = new File(b37KGReference);
|
||||||
|
try {
|
||||||
|
IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(referenceFile);
|
||||||
|
genomeLocParser = new GenomeLocParser(seq);
|
||||||
|
}
|
||||||
|
catch(FileNotFoundException ex) {
|
||||||
|
throw new UserException.CouldNotReadInputFile(referenceFile,ex);
|
||||||
|
}
|
||||||
|
|
||||||
|
// alleles
|
||||||
|
Aref = Allele.create("A", true);
|
||||||
|
Cref = Allele.create("C", true);
|
||||||
|
delRef = Allele.create("-", true);
|
||||||
|
T = Allele.create("T");
|
||||||
|
C = Allele.create("C");
|
||||||
|
ATC = Allele.create("ATC");
|
||||||
|
ATCATC = Allele.create("ATCATC");
|
||||||
|
}
|
||||||
|
|
||||||
|
private Genotype makeG(String sample, Allele a1, Allele a2) {
|
||||||
|
return new Genotype(sample, Arrays.asList(a1, a2));
|
||||||
|
}
|
||||||
|
|
||||||
|
private Genotype makeG(String sample, Allele a1, Allele a2, double log10pError, double l1, double l2, double l3) {
|
||||||
|
return new Genotype(sample, Arrays.asList(a1, a2), log10pError, new double[]{l1,l2,l3});
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private Genotype makeG(String sample, Allele a1, Allele a2, double log10pError) {
|
||||||
|
return new Genotype(sample, Arrays.asList(a1, a2), log10pError);
|
||||||
|
}
|
||||||
|
|
||||||
|
private VariantContext makeVC(String source, List<Allele> alleles) {
|
||||||
|
return makeVC(source, alleles, null, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private VariantContext makeVC(String source, List<Allele> alleles, Genotype... g1) {
|
||||||
|
return makeVC(source, alleles, Arrays.asList(g1));
|
||||||
|
}
|
||||||
|
|
||||||
|
private VariantContext makeVC(String source, List<Allele> alleles, String filter) {
|
||||||
|
return makeVC(source, alleles, filter.equals(".") ? null : new HashSet<String>(Arrays.asList(filter)));
|
||||||
|
}
|
||||||
|
|
||||||
|
private VariantContext makeVC(String source, List<Allele> alleles, Set<String> filters) {
|
||||||
|
return makeVC(source, alleles, null, filters);
|
||||||
|
}
|
||||||
|
|
||||||
|
private VariantContext makeVC(String source, List<Allele> alleles, Collection<Genotype> genotypes) {
|
||||||
|
return makeVC(source, alleles, genotypes, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private VariantContext makeVC(String source, List<Allele> alleles, Collection<Genotype> genotypes, Set<String> filters) {
|
||||||
|
int start = 10;
|
||||||
|
int stop = start; // alleles.contains(ATC) ? start + 3 : start;
|
||||||
|
return new VariantContext(source, "1", start, stop, alleles,
|
||||||
|
genotypes == null ? null : VariantContext.genotypeCollectionToMap(new TreeMap<String, Genotype>(), genotypes),
|
||||||
|
1.0, filters, null, Cref.getBases()[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Test allele merging
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private class MergeAllelesTest extends TestDataProvider {
|
||||||
|
List<List<Allele>> inputs;
|
||||||
|
List<Allele> expected;
|
||||||
|
|
||||||
|
private MergeAllelesTest(List<Allele>... arg) {
|
||||||
|
super(MergeAllelesTest.class);
|
||||||
|
LinkedList<List<Allele>> all = new LinkedList<List<Allele>>(Arrays.asList(arg));
|
||||||
|
expected = all.pollLast();
|
||||||
|
inputs = all;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return String.format("MergeAllelesTest input=%s expected=%s", inputs, expected);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@DataProvider(name = "mergeAlleles")
|
||||||
|
public Object[][] mergeAllelesData() {
|
||||||
|
// first, do no harm
|
||||||
|
new MergeAllelesTest(Arrays.asList(Aref),
|
||||||
|
Arrays.asList(Aref));
|
||||||
|
|
||||||
|
new MergeAllelesTest(Arrays.asList(Aref),
|
||||||
|
Arrays.asList(Aref),
|
||||||
|
Arrays.asList(Aref));
|
||||||
|
|
||||||
|
new MergeAllelesTest(Arrays.asList(Aref),
|
||||||
|
Arrays.asList(Aref, T),
|
||||||
|
Arrays.asList(Aref, T));
|
||||||
|
|
||||||
|
new MergeAllelesTest(Arrays.asList(Aref, C),
|
||||||
|
Arrays.asList(Aref, T),
|
||||||
|
Arrays.asList(Aref, C, T));
|
||||||
|
|
||||||
|
new MergeAllelesTest(Arrays.asList(Aref, T),
|
||||||
|
Arrays.asList(Aref, C),
|
||||||
|
Arrays.asList(Aref, C, T)); // sorted by allele
|
||||||
|
|
||||||
|
new MergeAllelesTest(Arrays.asList(Aref, C, T),
|
||||||
|
Arrays.asList(Aref, C),
|
||||||
|
Arrays.asList(Aref, C, T));
|
||||||
|
|
||||||
|
new MergeAllelesTest(Arrays.asList(Aref, T, C),
|
||||||
|
Arrays.asList(Aref, C),
|
||||||
|
Arrays.asList(Aref, C, T)); // sorted by allele
|
||||||
|
|
||||||
|
// The following is actually a pathological case - there's no way on a vcf to represent a null allele that's non-variant.
|
||||||
|
// The code converts this (correctly) to a single-base non-variant vc with whatever base was there as a reference.
|
||||||
|
new MergeAllelesTest(Arrays.asList(delRef),
|
||||||
|
Arrays.asList(Cref));
|
||||||
|
|
||||||
|
new MergeAllelesTest(Arrays.asList(delRef),
|
||||||
|
Arrays.asList(delRef, ATC),
|
||||||
|
Arrays.asList(delRef, ATC));
|
||||||
|
|
||||||
|
new MergeAllelesTest(Arrays.asList(delRef),
|
||||||
|
Arrays.asList(delRef, ATC, ATCATC),
|
||||||
|
Arrays.asList(delRef, ATC, ATCATC));
|
||||||
|
|
||||||
|
new MergeAllelesTest(Arrays.asList(delRef, ATCATC),
|
||||||
|
Arrays.asList(delRef, ATC, ATCATC),
|
||||||
|
Arrays.asList(delRef, ATC, ATCATC));
|
||||||
|
|
||||||
|
new MergeAllelesTest(Arrays.asList(delRef, ATC),
|
||||||
|
Arrays.asList(delRef, ATCATC),
|
||||||
|
Arrays.asList(delRef, ATC, ATCATC));
|
||||||
|
|
||||||
|
return MergeAllelesTest.getTests(MergeAllelesTest.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(dataProvider = "mergeAlleles")
|
||||||
|
public void testMergeAlleles(MergeAllelesTest cfg) {
|
||||||
|
final List<VariantContext> inputs = new ArrayList<VariantContext>();
|
||||||
|
|
||||||
|
int i = 0;
|
||||||
|
for ( final List<Allele> alleles : cfg.inputs ) {
|
||||||
|
final String name = "vcf" + ++i;
|
||||||
|
inputs.add(makeVC(name, alleles));
|
||||||
|
}
|
||||||
|
|
||||||
|
final List<String> priority = vcs2priority(inputs);
|
||||||
|
|
||||||
|
final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser,
|
||||||
|
inputs, priority,
|
||||||
|
VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
|
||||||
|
VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, "set", false, false);
|
||||||
|
|
||||||
|
Assert.assertEquals(merged.getAlleles(), cfg.expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Test rsID merging
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private class SimpleMergeRSIDTest extends TestDataProvider {
|
||||||
|
List<String> inputs;
|
||||||
|
String expected;
|
||||||
|
|
||||||
|
private SimpleMergeRSIDTest(String... arg) {
|
||||||
|
super(SimpleMergeRSIDTest.class);
|
||||||
|
LinkedList<String> allStrings = new LinkedList<String>(Arrays.asList(arg));
|
||||||
|
expected = allStrings.pollLast();
|
||||||
|
inputs = allStrings;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return String.format("SimpleMergeRSIDTest vc=%s expected=%s", inputs, expected);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@DataProvider(name = "simplemergersiddata")
|
||||||
|
public Object[][] createSimpleMergeRSIDData() {
|
||||||
|
new SimpleMergeRSIDTest(".", ".");
|
||||||
|
new SimpleMergeRSIDTest(".", ".", ".");
|
||||||
|
new SimpleMergeRSIDTest("rs1", "rs1");
|
||||||
|
new SimpleMergeRSIDTest("rs1", "rs1", "rs1");
|
||||||
|
new SimpleMergeRSIDTest(".", "rs1", "rs1");
|
||||||
|
new SimpleMergeRSIDTest("rs1", ".", "rs1");
|
||||||
|
new SimpleMergeRSIDTest("rs1", "rs2", "rs1,rs2");
|
||||||
|
new SimpleMergeRSIDTest("rs1", "rs2", "rs1", "rs1,rs2"); // duplicates
|
||||||
|
new SimpleMergeRSIDTest("rs2", "rs1", "rs2,rs1");
|
||||||
|
new SimpleMergeRSIDTest("rs2", "rs1", ".", "rs2,rs1");
|
||||||
|
new SimpleMergeRSIDTest("rs2", ".", "rs1", "rs2,rs1");
|
||||||
|
new SimpleMergeRSIDTest("rs1", ".", ".", "rs1");
|
||||||
|
new SimpleMergeRSIDTest("rs1", "rs2", "rs3", "rs1,rs2,rs3");
|
||||||
|
|
||||||
|
return SimpleMergeRSIDTest.getTests(SimpleMergeRSIDTest.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(dataProvider = "simplemergersiddata")
|
||||||
|
public void testRSIDMerge(SimpleMergeRSIDTest cfg) {
|
||||||
|
final VariantContext snpVC1 = makeVC("snpvc1", Arrays.asList(Aref, T));
|
||||||
|
final List<VariantContext> inputs = new ArrayList<VariantContext>();
|
||||||
|
|
||||||
|
for ( final String id : cfg.inputs ) {
|
||||||
|
MutableVariantContext vc = new MutableVariantContext(snpVC1);
|
||||||
|
if ( ! id.equals(".") ) vc.setID(id);
|
||||||
|
inputs.add(vc);
|
||||||
|
}
|
||||||
|
|
||||||
|
final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser,
|
||||||
|
inputs, null,
|
||||||
|
VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
|
||||||
|
VariantContextUtils.GenotypeMergeType.UNSORTED, false, false, "set", false, false);
|
||||||
|
Assert.assertEquals(merged.getID(), cfg.expected.equals(".") ? null : cfg.expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Test filtered merging
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private class MergeFilteredTest extends TestDataProvider {
|
||||||
|
List<VariantContext> inputs;
|
||||||
|
VariantContext expected;
|
||||||
|
String setExpected;
|
||||||
|
VariantContextUtils.FilteredRecordMergeType type;
|
||||||
|
|
||||||
|
|
||||||
|
private MergeFilteredTest(String name, VariantContext input1, VariantContext input2, VariantContext expected, String setExpected) {
|
||||||
|
this(name, input1, input2, expected, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, setExpected);
|
||||||
|
}
|
||||||
|
|
||||||
|
private MergeFilteredTest(String name, VariantContext input1, VariantContext input2, VariantContext expected, VariantContextUtils.FilteredRecordMergeType type, String setExpected) {
|
||||||
|
super(MergeFilteredTest.class, name);
|
||||||
|
LinkedList<VariantContext> all = new LinkedList<VariantContext>(Arrays.asList(input1, input2));
|
||||||
|
this.expected = expected;
|
||||||
|
this.type = type;
|
||||||
|
inputs = all;
|
||||||
|
this.setExpected = setExpected;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return String.format("%s input=%s expected=%s", super.toString(), inputs, expected);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@DataProvider(name = "mergeFiltered")
|
||||||
|
public Object[][] mergeFilteredData() {
|
||||||
|
new MergeFilteredTest("AllPass",
|
||||||
|
makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
|
||||||
|
makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
|
||||||
|
makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
|
||||||
|
VariantContextUtils.MERGE_INTERSECTION);
|
||||||
|
|
||||||
|
new MergeFilteredTest("noFilters",
|
||||||
|
makeVC("1", Arrays.asList(Aref, T), "."),
|
||||||
|
makeVC("2", Arrays.asList(Aref, T), "."),
|
||||||
|
makeVC("3", Arrays.asList(Aref, T), "."),
|
||||||
|
VariantContextUtils.MERGE_INTERSECTION);
|
||||||
|
|
||||||
|
new MergeFilteredTest("oneFiltered",
|
||||||
|
makeVC("1", Arrays.asList(Aref, T), "."),
|
||||||
|
makeVC("2", Arrays.asList(Aref, T), "FAIL"),
|
||||||
|
makeVC("3", Arrays.asList(Aref, T), "."),
|
||||||
|
String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX));
|
||||||
|
|
||||||
|
new MergeFilteredTest("onePassOneFail",
|
||||||
|
makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
|
||||||
|
makeVC("2", Arrays.asList(Aref, T), "FAIL"),
|
||||||
|
makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
|
||||||
|
String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX));
|
||||||
|
|
||||||
|
new MergeFilteredTest("AllFiltered",
|
||||||
|
makeVC("1", Arrays.asList(Aref, T), "FAIL"),
|
||||||
|
makeVC("2", Arrays.asList(Aref, T), "FAIL"),
|
||||||
|
makeVC("3", Arrays.asList(Aref, T), "FAIL"),
|
||||||
|
VariantContextUtils.MERGE_FILTER_IN_ALL);
|
||||||
|
|
||||||
|
// test ALL vs. ANY
|
||||||
|
new MergeFilteredTest("FailOneUnfiltered",
|
||||||
|
makeVC("1", Arrays.asList(Aref, T), "FAIL"),
|
||||||
|
makeVC("2", Arrays.asList(Aref, T), "."),
|
||||||
|
makeVC("3", Arrays.asList(Aref, T), "."),
|
||||||
|
VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
|
||||||
|
String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX));
|
||||||
|
|
||||||
|
new MergeFilteredTest("OneFailAllUnfilteredArg",
|
||||||
|
makeVC("1", Arrays.asList(Aref, T), "FAIL"),
|
||||||
|
makeVC("2", Arrays.asList(Aref, T), "."),
|
||||||
|
makeVC("3", Arrays.asList(Aref, T), "FAIL"),
|
||||||
|
VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ALL_UNFILTERED,
|
||||||
|
String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX));
|
||||||
|
|
||||||
|
// test excluding allele in filtered record
|
||||||
|
new MergeFilteredTest("DontIncludeAlleleOfFilteredRecords",
|
||||||
|
makeVC("1", Arrays.asList(Aref, T), "."),
|
||||||
|
makeVC("2", Arrays.asList(Aref, T), "FAIL"),
|
||||||
|
makeVC("3", Arrays.asList(Aref, T), "."),
|
||||||
|
String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX));
|
||||||
|
|
||||||
|
// promotion of site from unfiltered to PASSES
|
||||||
|
new MergeFilteredTest("UnfilteredPlusPassIsPass",
|
||||||
|
makeVC("1", Arrays.asList(Aref, T), "."),
|
||||||
|
makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
|
||||||
|
makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
|
||||||
|
VariantContextUtils.MERGE_INTERSECTION);
|
||||||
|
|
||||||
|
new MergeFilteredTest("RefInAll",
|
||||||
|
makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS),
|
||||||
|
makeVC("2", Arrays.asList(Aref), VariantContext.PASSES_FILTERS),
|
||||||
|
makeVC("3", Arrays.asList(Aref), VariantContext.PASSES_FILTERS),
|
||||||
|
VariantContextUtils.MERGE_REF_IN_ALL);
|
||||||
|
|
||||||
|
new MergeFilteredTest("RefInOne",
|
||||||
|
makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS),
|
||||||
|
makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
|
||||||
|
makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
|
||||||
|
"2");
|
||||||
|
|
||||||
|
return MergeFilteredTest.getTests(MergeFilteredTest.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(dataProvider = "mergeFiltered")
|
||||||
|
public void testMergeFiltered(MergeFilteredTest cfg) {
|
||||||
|
final List<String> priority = vcs2priority(cfg.inputs);
|
||||||
|
final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser,
|
||||||
|
cfg.inputs, priority, cfg.type, VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false);
|
||||||
|
|
||||||
|
// test alleles are equal
|
||||||
|
Assert.assertEquals(merged.getAlleles(), cfg.expected.getAlleles());
|
||||||
|
|
||||||
|
// test set field
|
||||||
|
Assert.assertEquals(merged.getAttribute("set"), cfg.setExpected);
|
||||||
|
|
||||||
|
// test filter field
|
||||||
|
Assert.assertEquals(merged.getFilters(), cfg.expected.getFilters());
|
||||||
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Test genotype merging
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private class MergeGenotypesTest extends TestDataProvider {
|
||||||
|
List<VariantContext> inputs;
|
||||||
|
VariantContext expected;
|
||||||
|
List<String> priority;
|
||||||
|
|
||||||
|
private MergeGenotypesTest(String name, String priority, VariantContext... arg) {
|
||||||
|
super(MergeGenotypesTest.class, name);
|
||||||
|
LinkedList<VariantContext> all = new LinkedList<VariantContext>(Arrays.asList(arg));
|
||||||
|
this.expected = all.pollLast();
|
||||||
|
inputs = all;
|
||||||
|
this.priority = Arrays.asList(priority.split(","));
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return String.format("%s input=%s expected=%s", super.toString(), inputs, expected);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@DataProvider(name = "mergeGenotypes")
|
||||||
|
public Object[][] mergeGenotypesData() {
|
||||||
|
new MergeGenotypesTest("TakeGenotypeByPriority-1,2", "1,2",
|
||||||
|
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)),
|
||||||
|
makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2)),
|
||||||
|
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)));
|
||||||
|
|
||||||
|
new MergeGenotypesTest("TakeGenotypeByPriority-1,2-nocall", "1,2",
|
||||||
|
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Allele.NO_CALL, Allele.NO_CALL, 1)),
|
||||||
|
makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2)),
|
||||||
|
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Allele.NO_CALL, Allele.NO_CALL, 1)));
|
||||||
|
|
||||||
|
new MergeGenotypesTest("TakeGenotypeByPriority-2,1", "2,1",
|
||||||
|
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)),
|
||||||
|
makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2)),
|
||||||
|
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2)));
|
||||||
|
|
||||||
|
new MergeGenotypesTest("NonOverlappingGenotypes", "1,2",
|
||||||
|
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)),
|
||||||
|
makeVC("2", Arrays.asList(Aref, T), makeG("s2", Aref, T, 2)),
|
||||||
|
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1), makeG("s2", Aref, T, 2)));
|
||||||
|
|
||||||
|
new MergeGenotypesTest("PreserveNoCall", "1,2",
|
||||||
|
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Allele.NO_CALL, Allele.NO_CALL, 1)),
|
||||||
|
makeVC("2", Arrays.asList(Aref, T), makeG("s2", Aref, T, 2)),
|
||||||
|
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Allele.NO_CALL, Allele.NO_CALL, 1), makeG("s2", Aref, T, 2)));
|
||||||
|
|
||||||
|
new MergeGenotypesTest("PerserveAlleles", "1,2",
|
||||||
|
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)),
|
||||||
|
makeVC("2", Arrays.asList(Aref, C), makeG("s2", Aref, C, 2)),
|
||||||
|
makeVC("3", Arrays.asList(Aref, C, T), makeG("s1", Aref, T, 1), makeG("s2", Aref, C, 2)));
|
||||||
|
|
||||||
|
new MergeGenotypesTest("TakeGenotypePartialOverlap-1,2", "1,2",
|
||||||
|
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)),
|
||||||
|
makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2), makeG("s3", Aref, T, 3)),
|
||||||
|
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1), makeG("s3", Aref, T, 3)));
|
||||||
|
|
||||||
|
new MergeGenotypesTest("TakeGenotypePartialOverlap-2,1", "2,1",
|
||||||
|
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)),
|
||||||
|
makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2), makeG("s3", Aref, T, 3)),
|
||||||
|
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2), makeG("s3", Aref, T, 3)));
|
||||||
|
|
||||||
|
// merging genothpes with PLs
|
||||||
|
new MergeGenotypesTest("TakeGenotypePartialOverlapWithPLs-2,1", "2,1",
|
||||||
|
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1,5,0,3)),
|
||||||
|
makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2,4,0,2), makeG("s3", Aref, T, 3,3,0,2)),
|
||||||
|
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2,4,0,2), makeG("s3", Aref, T, 3,3,0,2)));
|
||||||
|
|
||||||
|
new MergeGenotypesTest("TakeGenotypePartialOverlapWithPLs-1,2", "1,2",
|
||||||
|
makeVC("1", Arrays.asList(Aref,ATC), makeG("s1", Aref, ATC, 1,5,0,3)),
|
||||||
|
makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2,4,0,2), makeG("s3", Aref, T, 3,3,0,2)),
|
||||||
|
// no likelihoods on result since type changes to mixed multiallelic
|
||||||
|
makeVC("3", Arrays.asList(Aref, ATC, T), makeG("s1", Aref, ATC, 1), makeG("s3", Aref, T, 3)));
|
||||||
|
|
||||||
|
return MergeGenotypesTest.getTests(MergeGenotypesTest.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(dataProvider = "mergeGenotypes")
|
||||||
|
public void testMergeGenotypes(MergeGenotypesTest cfg) {
|
||||||
|
final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser,
|
||||||
|
cfg.inputs, cfg.priority, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
|
||||||
|
VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false);
|
||||||
|
|
||||||
|
// test alleles are equal
|
||||||
|
Assert.assertEquals(merged.getAlleles(), cfg.expected.getAlleles());
|
||||||
|
|
||||||
|
// test genotypes
|
||||||
|
assertGenotypesAreMostlyEqual(merged.getGenotypes(), cfg.expected.getGenotypes());
|
||||||
|
}
|
||||||
|
|
||||||
|
// necessary to not overload equals for genotypes
|
||||||
|
private void assertGenotypesAreMostlyEqual(Map<String, Genotype> actual, Map<String, Genotype> expected) {
|
||||||
|
if (actual == expected) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (actual == null || expected == null) {
|
||||||
|
Assert.fail("Maps not equal: expected: " + expected + " and actual: " + actual);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (actual.size() != expected.size()) {
|
||||||
|
Assert.fail("Maps do not have the same size:" + actual.size() + " != " + expected.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
for (Map.Entry<String, Genotype> entry : actual.entrySet()) {
|
||||||
|
String key = entry.getKey();
|
||||||
|
Genotype value = entry.getValue();
|
||||||
|
Genotype expectedValue = expected.get(key);
|
||||||
|
|
||||||
|
Assert.assertEquals(value.alleles, expectedValue.alleles);
|
||||||
|
Assert.assertEquals(value.getNegLog10PError(), expectedValue.getNegLog10PError());
|
||||||
|
Assert.assertEquals(value.hasLikelihoods(), expectedValue.hasLikelihoods());
|
||||||
|
if ( value.hasLikelihoods() )
|
||||||
|
Assert.assertEquals(value.getLikelihoods().getAsVector(), expectedValue.getLikelihoods().getAsVector());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMergeGenotypesUniquify() {
|
||||||
|
final VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1));
|
||||||
|
final VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2));
|
||||||
|
|
||||||
|
final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser,
|
||||||
|
Arrays.asList(vc1, vc2), null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
|
||||||
|
VariantContextUtils.GenotypeMergeType.UNIQUIFY, false, false, "set", false, false);
|
||||||
|
|
||||||
|
// test genotypes
|
||||||
|
Assert.assertEquals(merged.getGenotypes().keySet(), new HashSet<String>(Arrays.asList("s1.1", "s1.2")));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expectedExceptions = UserException.class)
|
||||||
|
public void testMergeGenotypesRequireUnique() {
|
||||||
|
final VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1));
|
||||||
|
final VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2));
|
||||||
|
|
||||||
|
final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser,
|
||||||
|
Arrays.asList(vc1, vc2), null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
|
||||||
|
VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE, false, false, "set", false, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Misc. tests
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAnnotationSet() {
|
||||||
|
for ( final boolean annotate : Arrays.asList(true, false)) {
|
||||||
|
for ( final String set : Arrays.asList("set", "combine", "x")) {
|
||||||
|
final List<String> priority = Arrays.asList("1", "2");
|
||||||
|
VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS);
|
||||||
|
VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS);
|
||||||
|
|
||||||
|
final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser,
|
||||||
|
Arrays.asList(vc1, vc2), priority, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
|
||||||
|
VariantContextUtils.GenotypeMergeType.PRIORITIZE, annotate, false, set, false, false);
|
||||||
|
|
||||||
|
if ( annotate )
|
||||||
|
Assert.assertEquals(merged.getAttribute(set), VariantContextUtils.MERGE_INTERSECTION);
|
||||||
|
else
|
||||||
|
Assert.assertFalse(merged.hasAttribute(set));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final List<String> vcs2priority(final Collection<VariantContext> vcs) {
|
||||||
|
final List<String> priority = new ArrayList<String>();
|
||||||
|
|
||||||
|
for ( final VariantContext vc : vcs ) {
|
||||||
|
priority.add(vc.getSource());
|
||||||
|
}
|
||||||
|
|
||||||
|
return priority;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -6,7 +6,7 @@
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<!-- Recalibration analysis script -->
|
<!-- Recalibration analysis script -->
|
||||||
<class name="org.broadinstitute.sting.analyzecovariates.AnalyzeCovariates" />
|
<class name="org.broadinstitute.sting.analyzecovariates.AnalyzeCovariates" />
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.*" />
|
<package name="org.broadinstitute.sting.gatk.walkers.recalibration" />
|
||||||
</dependencies>
|
</dependencies>
|
||||||
</executable>
|
</executable>
|
||||||
<resources>
|
<resources>
|
||||||
|
|
|
||||||
|
|
@ -29,7 +29,7 @@
|
||||||
<!-- Core walkers -->
|
<!-- Core walkers -->
|
||||||
<package name="org.broadinstitute.sting.gatk.walkers.**" />
|
<package name="org.broadinstitute.sting.gatk.walkers.**" />
|
||||||
<!-- All non-oneoff GATK-specific RODs -->
|
<!-- All non-oneoff GATK-specific RODs -->
|
||||||
<package name="org.broadinstitute.sting.gatk.refdata.**" />
|
<package name="org.broadinstitute.sting.utils.codecs.**" />
|
||||||
<!-- Filters -->
|
<!-- Filters -->
|
||||||
<package name="org.broadinstitute.sting.gatk.filters" />
|
<package name="org.broadinstitute.sting.gatk.filters" />
|
||||||
<!-- Tribble codecs -->
|
<!-- Tribble codecs -->
|
||||||
|
|
|
||||||
|
|
@ -221,7 +221,7 @@ class MethodsDevelopmentCallingPipeline extends QScript {
|
||||||
this.max_deletion_fraction = qscript.deletions
|
this.max_deletion_fraction = qscript.deletions
|
||||||
this.out = t.rawVCF
|
this.out = t.rawVCF
|
||||||
this.glm = org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel.Model.SNP
|
this.glm = org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel.Model.SNP
|
||||||
this.baq = if (noBAQ) {org.broadinstitute.sting.utils.baq.BAQ.CalculationMode.OFF} else {org.broadinstitute.sting.utils.baq.BAQ.CalculationMode.CALCULATE_AS_NECESSARY}
|
this.baq = if (noBAQ || t.isExome) {org.broadinstitute.sting.utils.baq.BAQ.CalculationMode.OFF} else {org.broadinstitute.sting.utils.baq.BAQ.CalculationMode.CALCULATE_AS_NECESSARY}
|
||||||
this.analysisName = t.name + "_UGs"
|
this.analysisName = t.name + "_UGs"
|
||||||
this.jobName = queueLogDir + t.name + ".snpcall"
|
this.jobName = queueLogDir + t.name + ".snpcall"
|
||||||
}
|
}
|
||||||
|
|
@ -266,13 +266,18 @@ class MethodsDevelopmentCallingPipeline extends QScript {
|
||||||
this.resource :+= new TaggedFile( t.dbsnpFile, "known=true,prior=2.0" )
|
this.resource :+= new TaggedFile( t.dbsnpFile, "known=true,prior=2.0" )
|
||||||
this.resource :+= new TaggedFile( projectConsensus_1000G, "prior=8.0" )
|
this.resource :+= new TaggedFile( projectConsensus_1000G, "prior=8.0" )
|
||||||
this.use_annotation ++= List("QD", "HaplotypeScore", "MQRankSum", "ReadPosRankSum", "MQ", "FS")
|
this.use_annotation ++= List("QD", "HaplotypeScore", "MQRankSum", "ReadPosRankSum", "MQ", "FS")
|
||||||
if(t.nSamples >= 10) {
|
if(t.nSamples >= 10) { // InbreedingCoeff is a population-wide statistic that requires at least 10 samples to calculate
|
||||||
this.use_annotation ++= List("InbreedingCoeff")
|
this.use_annotation ++= List("InbreedingCoeff")
|
||||||
}
|
}
|
||||||
if(!t.isExome) {
|
if(!t.isExome) {
|
||||||
this.use_annotation ++= List("DP")
|
this.use_annotation ++= List("DP")
|
||||||
} else {
|
} else { // exome specific parameters
|
||||||
|
this.resource :+= new TaggedFile( badSites_1000G, "bad=true,prior=2.0" )
|
||||||
this.mG = 6
|
this.mG = 6
|
||||||
|
if(t.nSamples <= 3) { // very few exome samples means very few variants
|
||||||
|
this.mG = 4
|
||||||
|
this.percentBad = 0.04
|
||||||
|
}
|
||||||
}
|
}
|
||||||
this.tranches_file = if ( goldStandard ) { t.goldStandardTranchesFile } else { t.tranchesFile }
|
this.tranches_file = if ( goldStandard ) { t.goldStandardTranchesFile } else { t.tranchesFile }
|
||||||
this.recal_file = if ( goldStandard ) { t.goldStandardRecalFile } else { t.recalFile }
|
this.recal_file = if ( goldStandard ) { t.goldStandardRecalFile } else { t.recalFile }
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue