Merge branch 'master' of ssh://gsa3.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Khalid Shakir 2011-09-27 00:51:45 -04:00
commit 77ba59e30a
83 changed files with 1645 additions and 822 deletions

View File

@ -59,7 +59,7 @@ public class LowMemoryIntervalSharder implements Iterator<FilePointer> {
*/
public FilePointer next() {
FilePointer current = wrappedIterator.next();
while(wrappedIterator.hasNext() && current.minus(wrappedIterator.peek()) == 0)
while(wrappedIterator.hasNext() && current.isRegionUnmapped == wrappedIterator.peek().isRegionUnmapped && current.minus(wrappedIterator.peek()) == 0)
current = current.combine(parser,wrappedIterator.next());
return current;
}

View File

@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.examples;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.ArgumentCollection;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
@ -59,6 +60,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
* @author Your Name
* @since Date created
*/
@Hidden
public class GATKDocsExample extends RodWalker<Integer, Integer> {
/**
* Put detailed documentation about the argument here. No need to duplicate the summary information

View File

@ -24,12 +24,14 @@
package org.broadinstitute.sting.gatk.report;
import org.broadinstitute.sting.utils.collections.Pair;
import java.util.*;
/**
* Tracks a linked list of GATKReportColumn in order by name.
*/
public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> {
public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> implements Iterable<GATKReportColumn> {
private List<String> columnNames = new ArrayList<String>();
/**
@ -52,4 +54,14 @@ public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> {
columnNames.add(key);
return super.put(key, value);
}
@Override
public Iterator<GATKReportColumn> iterator() {
return new Iterator<GATKReportColumn>() {
int offset = 0;
public boolean hasNext() { return offset < columnNames.size() ; }
public GATKReportColumn next() { return getByIndex(offset++); }
public void remove() { throw new UnsupportedOperationException("Cannot remove from a GATKReportColumn iterator"); }
};
}
}

View File

@ -286,6 +286,10 @@ public class GATKReportTable {
}
}
public boolean containsKey(Object primaryKey) {
return primaryKeyColumn.contains(primaryKey);
}
/**
* Set the value for a given position in the table
*

View File

@ -43,6 +43,9 @@ import java.util.List;
import java.util.Map;
/**
* The allele balance (fraction of ref bases over ref + alt bases) across all bialleleic het-called samples
*/
public class AlleleBalance extends InfoFieldAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -16,6 +16,9 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*;
/**
* The allele balance (fraction of ref bases over ref + alt bases) separately for each bialleleic het-called sample
*/
public class AlleleBalanceBySample extends GenotypeAnnotation implements ExperimentalAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {

View File

@ -6,8 +6,9 @@ import org.broadinstitute.sting.utils.variantcontext.Genotype;
import java.util.Map;
/**
* Abstract base class for all annotations that are normalized by depth
*/
public abstract class AnnotationByDepth extends InfoFieldAnnotation {

View File

@ -47,6 +47,9 @@ import java.util.List;
import java.util.Map;
/**
* Count of A, C, G, T bases across all samples
*/
public class BaseCounts extends InfoFieldAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -13,6 +13,9 @@ import java.util.LinkedHashMap;
import java.util.List;
/**
* The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for base qualities (ref bases vs. bases of the alternate allele)
*/
public class BaseQualityRankSumTest extends RankSumTest {
public List<String> getKeyNames() { return Arrays.asList("BaseQRankSum"); }

View File

@ -44,6 +44,11 @@ import java.util.List;
import java.util.Map;
/**
* Allele count in genotypes, for each ALT allele, in the same order as listed;
* allele Frequency, for each ALT allele, in the same order as listed; total number
* of alleles in called genotypes.
*/
public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation {
private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY };

View File

@ -16,7 +16,23 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Total (unfiltered) depth over all samples.
*
* This and AD are complementary fields that are two important ways of thinking about the depth of the data for this sample
* at this site. The DP field describe the total depth of reads that passed the Unified Genotypers internal
* quality control metrics (like MAPQ > 17, for example), whatever base was present in the read at this site.
* The AD values (one for each of REF and ALT fields) is the count of all reads that carried with them the
* REF and ALT alleles. The reason for this distinction is that the DP is in some sense reflective of the
* power I have to determine the genotype of the sample at this site, while the AD tells me how many times
* I saw each of the REF and ALT alleles in the reads, free of any bias potentially introduced by filtering
* the reads. If, for example, I believe there really is a an A/T polymorphism at a site, then I would like
* to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would
* normally be excluded from the statistical calculations going into GQ and QUAL.
*
* Note that the DP is affected by downsampling (-dcov) though, so the max value one can obtain for N samples with
* -dcov D is N * D
*/
public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -23,6 +23,25 @@ import java.util.List;
import java.util.Map;
/**
* The depth of coverage of each VCF allele in this sample.
*
* This and DP are complementary fields that are two important ways of thinking about the depth of the data for this sample
* at this site. The DP field describe the total depth of reads that passed the Unified Genotypers internal
* quality control metrics (like MAPQ > 17, for example), whatever base was present in the read at this site.
* The AD values (one for each of REF and ALT fields) is the count of all reads that carried with them the
* REF and ALT alleles. The reason for this distinction is that the DP is in some sense reflective of the
* power I have to determine the genotype of the sample at this site, while the AD tells me how many times
* I saw each of the REF and ALT alleles in the reads, free of any bias potentially introduced by filtering
* the reads. If, for example, I believe there really is a an A/T polymorphism at a site, then I would like
* to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would
* normally be excluded from the statistical calculations going into GQ and QUAL. Please note, however, that
* the AD isn't necessarily calculated exactly for indels (it counts as non-reference only those indels that
* are actually present and correctly left-aligned in the alignments themselves). Because of this fact and
* because the AD includes reads and bases that were filtered by the Unified Genotyper, <b>one should not base
* assumptions about the underlying genotype based on it</b>; instead, the genotype likelihoods (PLs) are what
* determine the genotype calls (see below).
*/
public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation {
private static String REF_ALLELE = "REF";

View File

@ -43,6 +43,11 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*;
/**
* Phred-scaled p-value using Fisher's Exact Test to detect strand bias (the variation
* being seen on only the forward or only the reverse strand) in the reads? More bias is
* indicative of false positive calls.
*/
public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation {
private static final String FS = "FS";
private static final double MIN_PVALUE = 1E-320;

View File

@ -17,6 +17,9 @@ import java.util.List;
import java.util.Map;
/**
* The GC content (# GC bases / # all bases) of the reference within 50 bp +/- this site
*/
public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -34,12 +34,12 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnot
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.genotype.Haplotype;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
@ -49,6 +49,10 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*;
/**
* Consistency of the site with two (and only two) segregating haplotypes. Higher scores
* are indicative of regions with bad alignments, often leading to artifactual SNP and indel calls.
*/
public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation {
private final static boolean DEBUG = false;
private final static int MIN_CONTEXT_WING_SIZE = 10;

View File

@ -19,6 +19,9 @@ import java.util.List;
import java.util.Map;
/**
* Phred-scaled P value of genotype-based (using GT field) test for Hardy-Weinberg test for disequilibrium
*/
public class HardyWeinberg extends InfoFieldAnnotation implements WorkInProgressAnnotation {
private static final int MIN_SAMPLES = 10;

View File

@ -16,7 +16,9 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Largest contiguous homopolymer run of the variant allele in either direction on the reference.
*/
public class HomopolymerRun extends InfoFieldAnnotation implements StandardAnnotation {
private boolean ANNOTATE_INDELS = true;

View File

@ -17,14 +17,15 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Created by IntelliJ IDEA.
* User: rpoplin
* Date: 5/16/11
*/
// A set of annotations calculated directly from the GLs
public class GLstats extends InfoFieldAnnotation implements StandardAnnotation {
/**
* Likelihood-based (using PL field) test for the inbreeding among samples.
*
* A continuous generalization of the Hardy-Weinberg test for disequilibrium that works
* well with limited coverage per sample. See the 1000 Genomes Phase I release for
* more information.
*/
public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnnotation {
private static final int MIN_SAMPLES = 10;

View File

@ -14,11 +14,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*;
/**
* Created by IntelliJ IDEA.
* User: delangel
* Date: Mar 11, 2011
* Time: 11:47:33 AM
* To change this template use File | Settings | File Templates.
* Rough category of indel type (insertion, deletion, multi-allelic, other)
*/
public class IndelType extends InfoFieldAnnotation implements ExperimentalAnnotation {

View File

@ -17,6 +17,9 @@ import java.util.List;
import java.util.Map;
/**
* Triplet annotation: fraction of MAQP == 0, MAPQ < 10, and count of all mapped reads
*/
public class LowMQ extends InfoFieldAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -14,6 +14,9 @@ import java.util.LinkedHashMap;
import java.util.List;
/**
* The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for mapping qualities (reads with ref bases vs. those with the alternate allele)
*/
public class MappingQualityRankSumTest extends RankSumTest {
public List<String> getKeyNames() { return Arrays.asList("MQRankSum"); }

View File

@ -19,6 +19,9 @@ import java.util.List;
import java.util.Map;
/**
* Total count across all samples of mapping quality zero reads
*/
public class MappingQualityZero extends InfoFieldAnnotation implements StandardAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -1,85 +1,81 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Created by IntelliJ IDEA.
* User: asivache
* Date: Feb 4, 2011
* Time: 6:46:25 PM
* To change this template use File | Settings | File Templates.
*/
public class MappingQualityZeroBySample extends GenotypeAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker,
AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext context, VariantContext vc, Genotype g) {
if ( g == null || !g.isCalled() )
return null;
int mq0 = 0;
ReadBackedPileup pileup = null;
if (vc.isIndel() && context.hasExtendedEventPileup())
pileup = context.getExtendedEventPileup();
else if (context.hasBasePileup())
pileup = context.getBasePileup();
else return null;
if (pileup != null) {
for (PileupElement p : pileup ) {
if ( p.getMappingQual() == 0 )
mq0++;
}
}
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), String.format("%d", mq0));
return map;
}
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.MAPPING_QUALITY_ZERO_KEY); }
public List<VCFFormatHeaderLine> getDescriptions() { return Arrays.asList(
new VCFFormatHeaderLine(getKeyNames().get(0), 1,
VCFHeaderLineType.Integer, "Number of Mapping Quality Zero Reads per sample")); }
}
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Count for each sample of mapping quality zero reads
*/
public class MappingQualityZeroBySample extends GenotypeAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker,
AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext context, VariantContext vc, Genotype g) {
if ( g == null || !g.isCalled() )
return null;
int mq0 = 0;
ReadBackedPileup pileup = null;
if (vc.isIndel() && context.hasExtendedEventPileup())
pileup = context.getExtendedEventPileup();
else if (context.hasBasePileup())
pileup = context.getBasePileup();
else return null;
if (pileup != null) {
for (PileupElement p : pileup ) {
if ( p.getMappingQual() == 0 )
mq0++;
}
}
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), String.format("%d", mq0));
return map;
}
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.MAPPING_QUALITY_ZERO_KEY); }
public List<VCFFormatHeaderLine> getDescriptions() { return Arrays.asList(
new VCFFormatHeaderLine(getKeyNames().get(0), 1,
VCFHeaderLineType.Integer, "Number of Mapping Quality Zero Reads per sample")); }
}

View File

@ -17,8 +17,9 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Fraction of all reads across samples that have mapping quality zero
*/
public class MappingQualityZeroFraction extends InfoFieldAnnotation implements ExperimentalAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -17,11 +17,8 @@ import java.util.List;
import java.util.Map;
/**
* Created by IntelliJ IDEA.
* User: rpoplin
* Date: 5/16/11
* The number of N bases, counting only SOLiD data
*/
public class NBaseCount extends InfoFieldAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if( stratifiedContexts.size() == 0 )

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -15,7 +16,11 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Variant confidence (given as (AB+BB)/AA from the PLs) / unfiltered depth.
*
* Low scores are indicative of false positive calls and artifacts.
*/
public class QualByDepth extends AnnotationByDepth implements StandardAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -21,6 +21,9 @@ import java.util.List;
import java.util.Map;
/**
* Root Mean Square of the mapping quality of the reads across all samples.
*/
public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -21,7 +21,9 @@ import java.util.List;
import java.util.Map;
/**
* Abstract root for all RankSum based annotations
*/
public abstract class RankSumTest extends InfoFieldAnnotation implements StandardAnnotation {
static final double INDEL_LIKELIHOOD_THRESH = 0.1;
static final boolean DEBUG = false;

View File

@ -1,209 +1,207 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Created by IntelliJ IDEA.
* User: asivache
* Date: Feb 4, 2011
* Time: 3:59:27 PM
* To change this template use File | Settings | File Templates.
*/
public class ReadDepthAndAllelicFractionBySample extends GenotypeAnnotation {
private static String REF_ALLELE = "REF";
private static String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref,
AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
if ( g == null || !g.isCalled() )
return null;
if ( vc.isSNP() )
return annotateSNP(stratifiedContext, vc);
if ( vc.isIndel() )
return annotateIndel(stratifiedContext, vc);
return null;
}
private Map<String,Object> annotateSNP(AlignmentContext stratifiedContext, VariantContext vc) {
if ( ! stratifiedContext.hasBasePileup() ) return null;
HashMap<Byte, Integer> alleleCounts = new HashMap<Byte, Integer>();
for ( Allele allele : vc.getAlternateAlleles() )
alleleCounts.put(allele.getBases()[0], 0);
ReadBackedPileup pileup = stratifiedContext.getBasePileup();
int totalDepth = pileup.size();
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), totalDepth); // put total depth in right away
if ( totalDepth == 0 ) return map; // done, can not compute FA at 0 coverage!!
int mq0 = 0; // number of "ref" reads that are acually mq0
for ( PileupElement p : pileup ) {
if ( p.getMappingQual() == 0 ) {
mq0++;
continue;
}
if ( alleleCounts.containsKey(p.getBase()) ) // non-mq0 read and it's an alt
alleleCounts.put(p.getBase(), alleleCounts.get(p.getBase())+1);
}
if ( mq0 == totalDepth ) return map; // if all reads are mq0, there is nothing left to do
// we need to add counts in the correct order
String[] fracs = new String[alleleCounts.size()];
for (int i = 0; i < vc.getAlternateAlleles().size(); i++) {
fracs[i] = String.format("%.3f", ((float)alleleCounts.get(vc.getAlternateAllele(i).getBases()[0]))/(totalDepth-mq0));
}
map.put(getKeyNames().get(1), fracs);
return map;
}
private Map<String,Object> annotateIndel(AlignmentContext
stratifiedContext, VariantContext
vc) {
if ( ! stratifiedContext.hasExtendedEventPileup() ) {
return null;
}
ReadBackedExtendedEventPileup pileup = stratifiedContext.getExtendedEventPileup();
if ( pileup == null )
return null;
int totalDepth = pileup.size();
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), totalDepth); // put total depth in right away
if ( totalDepth == 0 ) return map;
int mq0 = 0; // number of "ref" reads that are acually mq0
HashMap<String, Integer> alleleCounts = new HashMap<String, Integer>();
Allele refAllele = vc.getReference();
for ( Allele allele : vc.getAlternateAlleles() ) {
if ( allele.isNoCall() ) {
continue; // this does not look so good, should we die???
}
alleleCounts.put(getAlleleRepresentation(allele), 0);
}
for ( ExtendedEventPileupElement e : pileup.toExtendedIterable() ) {
if ( e.getMappingQual() == 0 ) {
mq0++;
continue;
}
if ( e.isInsertion() ) {
final String b = e.getEventBases();
if ( alleleCounts.containsKey(b) ) {
alleleCounts.put(b, alleleCounts.get(b)+1);
}
} else {
if ( e.isDeletion() ) {
if ( e.getEventLength() == refAllele.length() ) {
// this is indeed the deletion allele recorded in VC
final String b = DEL;
if ( alleleCounts.containsKey(b) ) {
alleleCounts.put(b, alleleCounts.get(b)+1);
}
}
// else {
// System.out.print(" deletion of WRONG length found");
// }
}
}
}
if ( mq0 == totalDepth ) return map;
String[] fracs = new String[alleleCounts.size()];
for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
fracs[i] = String.format("%.3f",
((float)alleleCounts.get(getAlleleRepresentation(vc.getAlternateAllele(i))))/(totalDepth-mq0));
map.put(getKeyNames().get(1), fracs);
//map.put(getKeyNames().get(0), counts);
return map;
}
private String getAlleleRepresentation(Allele allele) {
if ( allele.isNull() ) { // deletion wrt the ref
return DEL;
} else { // insertion, pass actual bases
return allele.getBaseString();
}
}
// public String getIndelBases()
public List<String> getKeyNames() { return Arrays.asList("DP","FA"); }
public List<VCFFormatHeaderLine> getDescriptions() {
return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0),
1,
VCFHeaderLineType.Integer,
"Total read depth per sample, including MQ0"),
new VCFFormatHeaderLine(getKeyNames().get(1),
VCFHeaderLineCount.UNBOUNDED,
VCFHeaderLineType.Float,
"Fractions of reads (excluding MQ0 from both ref and alt) supporting each reported alternative allele, per sample"));
}
}
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Unsupported
*/
@Hidden
public class ReadDepthAndAllelicFractionBySample extends GenotypeAnnotation {
private static String REF_ALLELE = "REF";
private static String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref,
AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
if ( g == null || !g.isCalled() )
return null;
if ( vc.isSNP() )
return annotateSNP(stratifiedContext, vc);
if ( vc.isIndel() )
return annotateIndel(stratifiedContext, vc);
return null;
}
private Map<String,Object> annotateSNP(AlignmentContext stratifiedContext, VariantContext vc) {
if ( ! stratifiedContext.hasBasePileup() ) return null;
HashMap<Byte, Integer> alleleCounts = new HashMap<Byte, Integer>();
for ( Allele allele : vc.getAlternateAlleles() )
alleleCounts.put(allele.getBases()[0], 0);
ReadBackedPileup pileup = stratifiedContext.getBasePileup();
int totalDepth = pileup.size();
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), totalDepth); // put total depth in right away
if ( totalDepth == 0 ) return map; // done, can not compute FA at 0 coverage!!
int mq0 = 0; // number of "ref" reads that are acually mq0
for ( PileupElement p : pileup ) {
if ( p.getMappingQual() == 0 ) {
mq0++;
continue;
}
if ( alleleCounts.containsKey(p.getBase()) ) // non-mq0 read and it's an alt
alleleCounts.put(p.getBase(), alleleCounts.get(p.getBase())+1);
}
if ( mq0 == totalDepth ) return map; // if all reads are mq0, there is nothing left to do
// we need to add counts in the correct order
String[] fracs = new String[alleleCounts.size()];
for (int i = 0; i < vc.getAlternateAlleles().size(); i++) {
fracs[i] = String.format("%.3f", ((float)alleleCounts.get(vc.getAlternateAllele(i).getBases()[0]))/(totalDepth-mq0));
}
map.put(getKeyNames().get(1), fracs);
return map;
}
private Map<String,Object> annotateIndel(AlignmentContext
stratifiedContext, VariantContext
vc) {
if ( ! stratifiedContext.hasExtendedEventPileup() ) {
return null;
}
ReadBackedExtendedEventPileup pileup = stratifiedContext.getExtendedEventPileup();
if ( pileup == null )
return null;
int totalDepth = pileup.size();
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), totalDepth); // put total depth in right away
if ( totalDepth == 0 ) return map;
int mq0 = 0; // number of "ref" reads that are acually mq0
HashMap<String, Integer> alleleCounts = new HashMap<String, Integer>();
Allele refAllele = vc.getReference();
for ( Allele allele : vc.getAlternateAlleles() ) {
if ( allele.isNoCall() ) {
continue; // this does not look so good, should we die???
}
alleleCounts.put(getAlleleRepresentation(allele), 0);
}
for ( ExtendedEventPileupElement e : pileup.toExtendedIterable() ) {
if ( e.getMappingQual() == 0 ) {
mq0++;
continue;
}
if ( e.isInsertion() ) {
final String b = e.getEventBases();
if ( alleleCounts.containsKey(b) ) {
alleleCounts.put(b, alleleCounts.get(b)+1);
}
} else {
if ( e.isDeletion() ) {
if ( e.getEventLength() == refAllele.length() ) {
// this is indeed the deletion allele recorded in VC
final String b = DEL;
if ( alleleCounts.containsKey(b) ) {
alleleCounts.put(b, alleleCounts.get(b)+1);
}
}
// else {
// System.out.print(" deletion of WRONG length found");
// }
}
}
}
if ( mq0 == totalDepth ) return map;
String[] fracs = new String[alleleCounts.size()];
for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
fracs[i] = String.format("%.3f",
((float)alleleCounts.get(getAlleleRepresentation(vc.getAlternateAllele(i))))/(totalDepth-mq0));
map.put(getKeyNames().get(1), fracs);
//map.put(getKeyNames().get(0), counts);
return map;
}
private String getAlleleRepresentation(Allele allele) {
if ( allele.isNull() ) { // deletion wrt the ref
return DEL;
} else { // insertion, pass actual bases
return allele.getBaseString();
}
}
// public String getIndelBases()
public List<String> getKeyNames() { return Arrays.asList("DP","FA"); }
public List<VCFFormatHeaderLine> getDescriptions() {
return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0),
1,
VCFHeaderLineType.Integer,
"Total read depth per sample, including MQ0"),
new VCFFormatHeaderLine(getKeyNames().get(1),
VCFHeaderLineCount.UNBOUNDED,
VCFHeaderLineType.Float,
"Fractions of reads (excluding MQ0 from both ref and alt) supporting each reported alternative allele, per sample"));
}
}

View File

@ -19,11 +19,8 @@ import java.util.LinkedHashMap;
import java.util.List;
/**
* Created by IntelliJ IDEA.
* User: rpoplin
* Date: 3/30/11
* The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for the distance from the end of the read for reads with the alternate allele; if the alternate allele is only seen near the ends of reads this is indicative of error).
*/
public class ReadPosRankSumTest extends RankSumTest {
public List<String> getKeyNames() { return Arrays.asList("ReadPosRankSum"); }

View File

@ -15,8 +15,9 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* SB annotation value by depth of alt containing samples
*/
public class SBByDepth extends AnnotationByDepth {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -41,7 +41,9 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* List all of the samples in the info field
*/
public class SampleList extends InfoFieldAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -82,7 +82,8 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
GENE_NAME_KEY ("SNPEFF_GENE_NAME", 3),
GENE_BIOTYPE_KEY ("SNPEFF_GENE_BIOTYPE", 4),
TRANSCRIPT_ID_KEY ("SNPEFF_TRANSCRIPT_ID", 6),
EXON_ID_KEY ("SNPEFF_EXON_ID", 7);
EXON_ID_KEY ("SNPEFF_EXON_ID", 7),
FUNCTIONAL_CLASS_KEY ("SNPEFF_FUNCTIONAL_CLASS", -1);
// Actual text of the key
private final String keyName;
@ -108,48 +109,73 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
// Possible SnpEff biological effects. All effect names found in the SnpEff input file
// are validated against this list.
public enum EffectType {
NONE,
CHROMOSOME,
INTERGENIC,
UPSTREAM,
UTR_5_PRIME,
UTR_5_DELETED,
START_GAINED,
SPLICE_SITE_ACCEPTOR,
SPLICE_SITE_DONOR,
START_LOST,
SYNONYMOUS_START,
NON_SYNONYMOUS_START,
CDS,
GENE,
TRANSCRIPT,
EXON,
EXON_DELETED,
NON_SYNONYMOUS_CODING,
SYNONYMOUS_CODING,
FRAME_SHIFT,
CODON_CHANGE,
CODON_INSERTION,
CODON_CHANGE_PLUS_CODON_INSERTION,
CODON_DELETION,
CODON_CHANGE_PLUS_CODON_DELETION,
STOP_GAINED,
SYNONYMOUS_STOP,
NON_SYNONYMOUS_STOP,
STOP_LOST,
INTRON,
UTR_3_PRIME,
UTR_3_DELETED,
DOWNSTREAM,
INTRON_CONSERVED,
INTERGENIC_CONSERVED,
REGULATION,
CUSTOM,
WITHIN_NON_CODING_GENE
// High-impact effects:
FRAME_SHIFT (EffectFunctionalClass.NONE, false),
STOP_GAINED (EffectFunctionalClass.NONSENSE, false),
START_LOST (EffectFunctionalClass.NONE, false),
SPLICE_SITE_ACCEPTOR (EffectFunctionalClass.NONE, false),
SPLICE_SITE_DONOR (EffectFunctionalClass.NONE, false),
EXON_DELETED (EffectFunctionalClass.NONE, false),
STOP_LOST (EffectFunctionalClass.NONE, false),
// Moderate-impact effects:
NON_SYNONYMOUS_CODING (EffectFunctionalClass.MISSENSE, false),
CODON_CHANGE (EffectFunctionalClass.NONE, false),
CODON_INSERTION (EffectFunctionalClass.NONE, false),
CODON_CHANGE_PLUS_CODON_INSERTION (EffectFunctionalClass.NONE, false),
CODON_DELETION (EffectFunctionalClass.NONE, false),
CODON_CHANGE_PLUS_CODON_DELETION (EffectFunctionalClass.NONE, false),
UTR_5_DELETED (EffectFunctionalClass.NONE, false),
UTR_3_DELETED (EffectFunctionalClass.NONE, false),
// Low-impact effects:
SYNONYMOUS_CODING (EffectFunctionalClass.SILENT, false),
SYNONYMOUS_START (EffectFunctionalClass.SILENT, false),
NON_SYNONYMOUS_START (EffectFunctionalClass.SILENT, false),
SYNONYMOUS_STOP (EffectFunctionalClass.SILENT, false),
NON_SYNONYMOUS_STOP (EffectFunctionalClass.SILENT, false),
START_GAINED (EffectFunctionalClass.NONE, false),
// Modifiers:
NONE (EffectFunctionalClass.NONE, true),
CHROMOSOME (EffectFunctionalClass.NONE, true),
INTERGENIC (EffectFunctionalClass.NONE, true),
UPSTREAM (EffectFunctionalClass.NONE, true),
UTR_5_PRIME (EffectFunctionalClass.NONE, true),
CDS (EffectFunctionalClass.NONE, true),
GENE (EffectFunctionalClass.NONE, true),
TRANSCRIPT (EffectFunctionalClass.NONE, true),
EXON (EffectFunctionalClass.NONE, true),
INTRON (EffectFunctionalClass.NONE, true),
UTR_3_PRIME (EffectFunctionalClass.NONE, true),
DOWNSTREAM (EffectFunctionalClass.NONE, true),
INTRON_CONSERVED (EffectFunctionalClass.NONE, true),
INTERGENIC_CONSERVED (EffectFunctionalClass.NONE, true),
REGULATION (EffectFunctionalClass.NONE, true),
CUSTOM (EffectFunctionalClass.NONE, true),
WITHIN_NON_CODING_GENE (EffectFunctionalClass.NONE, true);
private final EffectFunctionalClass functionalClass;
private final boolean isModifier;
EffectType ( EffectFunctionalClass functionalClass, boolean isModifier ) {
this.functionalClass = functionalClass;
this.isModifier = isModifier;
}
public EffectFunctionalClass getFunctionalClass() {
return functionalClass;
}
public boolean isModifier() {
return isModifier;
}
}
// SnpEff labels each effect as either LOW, MODERATE, or HIGH impact.
// SnpEff labels each effect as either LOW, MODERATE, or HIGH impact. We take the additional step of
// classifying some of the LOW impact effects as MODIFIERs.
public enum EffectImpact {
MODIFIER (0),
LOW (1),
MODERATE (2),
HIGH (3);
@ -163,6 +189,10 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
public boolean isHigherImpactThan ( EffectImpact other ) {
return this.severityRating > other.severityRating;
}
public boolean isSameImpactAs ( EffectImpact other ) {
return this.severityRating == other.severityRating;
}
}
// SnpEff labels most effects as either CODING or NON_CODING, but sometimes omits this information.
@ -172,6 +202,24 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
UNKNOWN
}
// We assign a functional class to each SnpEff effect.
public enum EffectFunctionalClass {
NONE (0),
SILENT (1),
MISSENSE (2),
NONSENSE (3);
private final int priority;
EffectFunctionalClass ( int priority ) {
this.priority = priority;
}
public boolean isHigherPriorityThan ( EffectFunctionalClass other ) {
return this.priority > other.priority;
}
}
public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines ) {
// Make sure that we actually have a valid SnpEff rod binding (just in case the user specified -A SnpEff
// without providing a SnpEff rod via --snpEffFile):
@ -336,7 +384,8 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
InfoFieldKey.GENE_NAME_KEY.getKeyName(),
InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(),
InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(),
InfoFieldKey.EXON_ID_KEY.getKeyName()
InfoFieldKey.EXON_ID_KEY.getKeyName(),
InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName()
);
}
@ -349,7 +398,8 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
new VCFInfoHeaderLine(InfoFieldKey.GENE_NAME_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Gene name for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Gene biotype for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Transcript ID for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(InfoFieldKey.EXON_ID_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Exon ID for the highest-impact effect resulting from the current variant")
new VCFInfoHeaderLine(InfoFieldKey.EXON_ID_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Exon ID for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Functional class of the highest-impact effect resulting from the current variant: " + Arrays.toString(EffectFunctionalClass.values()))
);
}
@ -411,11 +461,16 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
return;
}
try {
impact = EffectImpact.valueOf(effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]);
if ( effect != null && effect.isModifier() ) {
impact = EffectImpact.MODIFIER;
}
catch ( IllegalArgumentException e ) {
parseError(String.format("Unrecognized value for effect impact: %s", effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]));
else {
try {
impact = EffectImpact.valueOf(effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]);
}
catch ( IllegalArgumentException e ) {
parseError(String.format("Unrecognized value for effect impact: %s", effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]));
}
}
codonChange = effectMetadata[InfoFieldKey.CODON_CHANGE_KEY.getFieldIndex()];
@ -472,9 +527,17 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
}
// Otherwise, both effects are either in or not in a coding gene, so we compare the impacts
// of the effects themselves:
// of the effects themselves. Effects with the same impact are tie-broken using the
// functional class of the effect:
return impact.isHigherImpactThan(other.impact);
if ( impact.isHigherImpactThan(other.impact) ) {
return true;
}
else if ( impact.isSameImpactAs(other.impact) ) {
return effect.getFunctionalClass().isHigherPriorityThan(other.effect.getFunctionalClass());
}
return false;
}
public Map<String, Object> getAnnotations() {
@ -488,6 +551,7 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
addAnnotation(annotations, InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(), geneBiotype);
addAnnotation(annotations, InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(), transcriptID);
addAnnotation(annotations, InfoFieldKey.EXON_ID_KEY.getKeyName(), exonID);
addAnnotation(annotations, InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), effect.getFunctionalClass().toString());
return annotations;
}

View File

@ -17,6 +17,9 @@ import java.util.List;
import java.util.Map;
/**
* Fraction of reads containing spanning deletions at this site.
*/
public class SpanningDeletions extends InfoFieldAnnotation implements StandardAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -19,12 +20,9 @@ import java.util.List;
import java.util.Map;
/**
* Created by IntelliJ IDEA.
* User: delangel
* Date: 6/29/11
* Time: 3:14 PM
* To change this template use File | Settings | File Templates.
* Counts of bases from SLX, 454, and SOLiD at this site
*/
@Hidden
public class TechnologyComposition extends InfoFieldAnnotation implements ExperimentalAnnotation {
private String nSLX = "NumSLX";
private String n454 ="Num454";

View File

@ -162,6 +162,12 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
@Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false)
protected boolean indelsOnly = false;
@Argument(fullName="family_string",shortName="family",required=false,doc="A family string of the form mom+dad=child for use with the mendelian violation ratio annotation")
public String familyStr = null;
@Argument(fullName="MendelViolationGenotypeQualityThreshold",shortName="mvq",required=false,doc="The genotype quality treshold in order to annotate mendelian violation ratio")
public double minGenotypeQualityP = 0.0;
private VariantAnnotatorEngine engine;
private Collection<VariantContext> indelBufferContext;

View File

@ -23,7 +23,7 @@
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.genotype;
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.BaseUtils;
@ -34,7 +34,7 @@ import org.broadinstitute.sting.utils.BaseUtils;
* Time: 6:46:09 PM
* To change this template use File | Settings | File Templates.
*/
public enum DiploidGenotype {
enum DiploidGenotype {
AA ('A', 'A'),
AC ('A', 'C'),
AG ('A', 'G'),

View File

@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.gatk.walkers.indels.HaplotypeIndelErrorModel;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
/**
* Created by IntelliJ IDEA.

View File

@ -30,7 +30,6 @@ import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import org.broadinstitute.sting.utils.pileup.FragmentPileup;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;

View File

@ -26,7 +26,6 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import java.util.Arrays;

View File

@ -48,27 +48,12 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
// code for testing purposes
//
private final static boolean DEBUG = false;
private final static boolean PRINT_LIKELIHOODS = false;
private final static int N_CYCLES = 1;
private SimpleTimer timerExpt = new SimpleTimer("linearExactBanded");
private SimpleTimer timerGS = new SimpleTimer("linearExactGS");
private final static boolean COMPARE_TO_GS = false;
public enum ExactCalculation {
N2_GOLD_STANDARD,
LINEAR_EXPERIMENTAL
}
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
private boolean SIMPLE_GREEDY_GENOTYPER = false;
private final boolean SIMPLE_GREEDY_GENOTYPER = false;
private final static double SUM_GL_THRESH_NOCALL = -0.001; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
final private ExactCalculation calcToUse;
protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
super(UAC, N, logger, verboseWriter);
calcToUse = UAC.EXACT_CALCULATION_TYPE;
}
public void getLog10PNonRef(RefMetaDataTracker tracker,
@ -76,43 +61,12 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
Map<String, Genotype> GLs, Set<Allele>alleles,
double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors) {
// todo -- REMOVE ME AFTER TESTING
// todo -- REMOVE ME AFTER TESTING
// todo -- REMOVE ME AFTER TESTING
double[] gsPosteriors;
if ( COMPARE_TO_GS ) // due to annoying special values in incoming array, we have to clone up here
gsPosteriors = log10AlleleFrequencyPosteriors.clone();
int idxAA = GenotypeType.AA.ordinal();
int idxAB = GenotypeType.AB.ordinal();
int idxBB = GenotypeType.BB.ordinal();
// todo -- remove me after testing
if ( N_CYCLES > 1 ) {
for ( int i = 0; i < N_CYCLES; i++) {
timerGS.restart();
linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.clone(), idxAA, idxAB, idxBB);
timerGS.stop();
timerExpt.restart();
linearExactBanded(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.clone());
timerExpt.stop();
}
System.out.printf("good = %.2f, expt = %.2f, delta = %.2f%n",
timerGS.getElapsedTime(), timerExpt.getElapsedTime(), timerExpt.getElapsedTime()-timerGS.getElapsedTime());
}
int lastK = -1;
int numAlleles = alleles.size();
final int numAlleles = alleles.size();
final double[][] posteriorCache = numAlleles > 2 ? new double[numAlleles-1][] : null;
final double[] bestAFguess = numAlleles > 2 ? new double[numAlleles-1] : null;
int idxDiag = numAlleles;
int incr = numAlleles - 1;
double[][] posteriorCache = new double[numAlleles-1][];
double[] bestAFguess = new double[numAlleles-1];
for (int k=1; k < numAlleles; k++) {
// multi-allelic approximation, part 1: Ideally
// for each alt allele compute marginal (suboptimal) posteriors -
@ -121,24 +75,17 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
// So, for example, with 2 alt alleles, likelihoods have AA,AB,AC,BB,BC,CC.
// 3 alt alleles: AA,AB,AC,AD BB BC BD CC CD DD
idxAA = 0;
idxAB = k;
final int idxAA = 0;
final int idxAB = k;
// yy is always element on the diagonal.
// 2 alleles: BBelement 2
// 3 alleles: BB element 3. CC element 5
// 4 alleles:
idxBB = idxDiag;
final int idxBB = idxDiag;
idxDiag += incr--;
// todo - possible cleanup
switch ( calcToUse ) {
case N2_GOLD_STANDARD:
lastK = gdaN2GoldStandard(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB);
break;
case LINEAR_EXPERIMENTAL:
lastK = linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB);
break;
}
final int lastK = linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB);
if (numAlleles > 2) {
posteriorCache[k-1] = log10AlleleFrequencyPosteriors.clone();
bestAFguess[k-1] = (double)MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors);
@ -153,39 +100,14 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
log10AlleleFrequencyPosteriors[k] = (posteriorCache[mostLikelyAlleleIdx][k]);
}
// todo -- REMOVE ME AFTER TESTING
// todo -- REMOVE ME AFTER TESTING
// todo -- REMOVE ME AFTER TESTING
if ( COMPARE_TO_GS ) {
gdaN2GoldStandard(GLs, log10AlleleFrequencyPriors, gsPosteriors, idxAA, idxAB, idxBB);
double log10thisPVar = Math.log10(MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors)[0]);
double log10gsPVar = Math.log10(MathUtils.normalizeFromLog10(gsPosteriors)[0]);
boolean eq = (log10thisPVar == Double.NEGATIVE_INFINITY && log10gsPVar == Double.NEGATIVE_INFINITY) || MathUtils.compareDoubles(log10thisPVar, log10gsPVar, 1e-4) == 0;
if ( ! eq || PRINT_LIKELIHOODS ) {
System.out.printf("----------------------------------------%n");
for (int k=0; k < log10AlleleFrequencyPosteriors.length; k++) {
double x = log10AlleleFrequencyPosteriors[k];
System.out.printf(" %d\t%.2f\t%.2f\t%b%n", k,
x < -1e10 ? Double.NEGATIVE_INFINITY : x, gsPosteriors[k],
log10AlleleFrequencyPosteriors[k] == gsPosteriors[k]);
}
System.out.printf("MAD_AC\t%d\t%d\t%.2f\t%.2f\t%.6f%n",
ref.getLocus().getStart(), lastK, log10thisPVar, log10gsPVar, log10thisPVar - log10gsPVar);
}
}
}
private static final ArrayList<double[]> getGLs(Map<String, Genotype> GLs) {
ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>();
//int j = 0;
genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy
for ( Genotype sample : GLs.values() ) {
if ( sample.hasLikelihoods() ) {
//double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(GLs.get(sample).getLikelihoods());
double[] gls = sample.getLikelihoods().getAsVector();
if (MathUtils.sum(gls) < SUM_GL_THRESH_NOCALL)
@ -240,84 +162,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
}
}
// now with banding
public int linearExactBanded(Map<String, Genotype> GLs,
double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors) {
throw new NotImplementedException();
// final int numSamples = GLs.size();
// final int numChr = 2*numSamples;
// final double[][] genotypeLikelihoods = getGLs(GLs);
//
// final ExactACCache logY = new ExactACCache(numSamples+1);
// logY.getkMinus0()[0] = 0.0; // the zero case
//
// double maxLog10L = Double.NEGATIVE_INFINITY;
// boolean done = false;
// int lastK = -1;
// final int BAND_SIZE = 10;
//
// for (int k=0; k <= numChr && ! done; k++ ) {
// final double[] kMinus0 = logY.getkMinus0();
// int jStart = Math.max(k - BAND_SIZE, 1);
// int jStop = Math.min(k + BAND_SIZE, numSamples);
//
// if ( k == 0 ) { // special case for k = 0
// for ( int j=1; j <= numSamples; j++ ) {
// kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods[j][GenotypeType.AA.ordinal()];
// }
// } else { // k > 0
// final double[] kMinus1 = logY.getkMinus1();
// final double[] kMinus2 = logY.getkMinus2();
// Arrays.fill(kMinus0,0);
//
// for ( int j = jStart; j <= jStop; j++ ) {
// final double[] gl = genotypeLikelihoods[j];
// final double logDenominator = log10Cache[2*j] + log10Cache[2*j-1];
//
// double aa = Double.NEGATIVE_INFINITY;
// double ab = Double.NEGATIVE_INFINITY;
// if (k < 2*j-1)
// aa = log10Cache[2*j-k] + log10Cache[2*j-k-1] + kMinus0[j-1] + gl[GenotypeType.AA.ordinal()];
//
// if (k < 2*j)
// ab = log10Cache[2*k] + log10Cache[2*j-k]+ kMinus1[j-1] + gl[GenotypeType.AB.ordinal()];
//
// double log10Max;
// if (k > 1) {
// final double bb = log10Cache[k] + log10Cache[k-1] + kMinus2[j-1] + gl[GenotypeType.BB.ordinal()];
// log10Max = approximateLog10SumLog10(aa, ab, bb);
// } else {
// // we know we aren't considering the BB case, so we can use an optimized log10 function
// log10Max = approximateLog10SumLog10(aa, ab);
// }
//
// // finally, update the L(j,k) value
// kMinus0[j] = log10Max - logDenominator;
//
// String offset = Utils.dupString(' ',k);
// System.out.printf("%s%3d %3d %.2f%n", offset, k, j, kMinus0[j]);
// }
// }
//
// // update the posteriors vector
// final double log10LofK = kMinus0[jStop];
// log10AlleleFrequencyPosteriors[k] = log10LofK + log10AlleleFrequencyPriors[k];
//
// // can we abort early?
// lastK = k;
// maxLog10L = Math.max(maxLog10L, log10LofK);
// if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
// if ( DEBUG ) System.out.printf(" *** breaking early k=%d log10L=%.2f maxLog10L=%.2f%n", k, log10LofK, maxLog10L);
// done = true;
// }
//
// logY.rotate();
// }
//
// return lastK;
}
public int linearExact(Map<String, Genotype> GLs,
double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) {
@ -605,82 +449,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
return calls;
}
// -------------------------------------------------------------------------------------
//
// Gold standard, but O(N^2), implementation.
//
// TODO -- remove me for clarity in this code
//
// -------------------------------------------------------------------------------------
public int gdaN2GoldStandard(Map<String, Genotype> GLs,
double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) {
int numSamples = GLs.size();
int numChr = 2*numSamples;
double[][] logYMatrix = new double[1+numSamples][1+numChr];
for (int i=0; i <=numSamples; i++)
for (int j=0; j <=numChr; j++)
logYMatrix[i][j] = Double.NEGATIVE_INFINITY;
//YMatrix[0][0] = 1.0;
logYMatrix[0][0] = 0.0;
int j=0;
for ( Map.Entry<String, Genotype> sample : GLs.entrySet() ) {
j++;
if ( !sample.getValue().hasLikelihoods() )
continue;
//double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(GLs.get(sample).getLikelihoods());
double[] genotypeLikelihoods = sample.getValue().getLikelihoods().getAsVector();
//double logDenominator = Math.log10(2.0*j*(2.0*j-1));
double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
// special treatment for k=0: iteration reduces to:
//YMatrix[j][0] = YMatrix[j-1][0]*genotypeLikelihoods[GenotypeType.AA.ordinal()];
logYMatrix[j][0] = logYMatrix[j-1][0] + genotypeLikelihoods[idxAA];
for (int k=1; k <= 2*j; k++ ) {
//double num = (2.0*j-k)*(2.0*j-k-1)*YMatrix[j-1][k] * genotypeLikelihoods[GenotypeType.AA.ordinal()];
double logNumerator[];
logNumerator = new double[3];
if (k < 2*j-1)
logNumerator[0] = MathUtils.log10Cache[2*j-k] + MathUtils.log10Cache[2*j-k-1] + logYMatrix[j-1][k] +
genotypeLikelihoods[idxAA];
else
logNumerator[0] = Double.NEGATIVE_INFINITY;
if (k < 2*j)
logNumerator[1] = MathUtils.log10Cache[2*k] + MathUtils.log10Cache[2*j-k]+ logYMatrix[j-1][k-1] +
genotypeLikelihoods[idxAB];
else
logNumerator[1] = Double.NEGATIVE_INFINITY;
if (k > 1)
logNumerator[2] = MathUtils.log10Cache[k] + MathUtils.log10Cache[k-1] + logYMatrix[j-1][k-2] +
genotypeLikelihoods[idxBB];
else
logNumerator[2] = Double.NEGATIVE_INFINITY;
double logNum = MathUtils.softMax(logNumerator);
//YMatrix[j][k] = num/den;
logYMatrix[j][k] = logNum - logDenominator;
}
}
for (int k=0; k <= numChr; k++)
log10AlleleFrequencyPosteriors[k] = logYMatrix[j][k] + log10AlleleFrequencyPriors[k];
return numChr;
}
private final static void printLikelihoods(int numChr, double[][] logYMatrix, double[] log10AlleleFrequencyPriors) {
int j = logYMatrix.length - 1;
System.out.printf("-----------------------------------%n");
@ -689,5 +457,4 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
System.out.printf(" %4d\t%8.2f\t%8.2f\t%8.2f%n", k, logYMatrix[j][k], log10AlleleFrequencyPriors[k], posterior);
}
}
}

View File

@ -34,10 +34,9 @@ import org.broadinstitute.sting.gatk.walkers.indels.HaplotypeIndelErrorModel;
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.genotype.Haplotype;
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
@ -396,8 +395,8 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
System.out.format("hsize: %d eventLength: %d refSize: %d, locStart: %d numpr: %d\n",hsize,eventLength,
(int)ref.getWindow().size(), loc.getStart(), numPrefBases);
//System.out.println(eventLength);
haplotypeMap = Haplotype.makeHaplotypeListFromAlleles( alleleList, loc.getStart(),
ref, hsize, numPrefBases);
haplotypeMap = Haplotype.makeHaplotypeListFromAlleles(alleleList, loc.getStart(),
ref, hsize, numPrefBases);
// For each sample, get genotype likelihoods based on pileup
// compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with them.

View File

@ -31,9 +31,9 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
@ -122,8 +122,10 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
aList.add(refAllele);
aList.add(altAllele);
double[] dlike = new double[]{likelihoods[refGenotype.ordinal()],likelihoods[hetGenotype.ordinal()],likelihoods[homGenotype.ordinal()]} ;
// normalize in log space so that max element is zero.
GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(),
aList, dlike, getFilteredDepth(pileup)));
aList, MathUtils.normalizeFromLog10(dlike, false, true), getFilteredDepth(pileup)));
}
return refAllele;

View File

@ -168,10 +168,6 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "GSA_PRODUCTION_ONLY", shortName = "GSA_PRODUCTION_ONLY", doc = "don't ever use me", required = false)
public boolean GSA_PRODUCTION_ONLY = false;
@Hidden
@Argument(fullName = "exactCalculation", shortName = "exactCalculation", doc = "expt", required = false)
public ExactAFCalculationModel.ExactCalculation EXACT_CALCULATION_TYPE = ExactAFCalculationModel.ExactCalculation.LINEAR_EXPERIMENTAL;
@Hidden
@Argument(fullName = "ignoreSNPAlleles", shortName = "ignoreSNPAlleles", doc = "expt", required = false)
public boolean IGNORE_SNP_ALLELES = false;
@ -191,7 +187,6 @@ public class UnifiedArgumentCollection {
uac.GLmodel = GLmodel;
uac.AFmodel = AFmodel;
uac.EXACT_CALCULATION_TYPE = EXACT_CALCULATION_TYPE;
uac.heterozygosity = heterozygosity;
uac.PCR_error = PCR_error;
uac.GenotypingMode = GenotypingMode;

View File

@ -544,6 +544,21 @@ public class UnifiedGenotyperEngine {
AFs[i] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
}
private final static double[] binomialProbabilityDepthCache = new double[10000];
static {
for ( int i = 1; i < binomialProbabilityDepthCache.length; i++ ) {
binomialProbabilityDepthCache[i] = MathUtils.binomialProbability(0, i, 0.5);
}
}
private final double getRefBinomialProb(final int depth) {
if ( depth < binomialProbabilityDepthCache.length )
return binomialProbabilityDepthCache[depth];
else
return MathUtils.binomialProbability(0, depth, 0.5);
}
private VariantCallContext estimateReferenceConfidence(VariantContext vc, Map<String, AlignmentContext> contexts, double theta, boolean ignoreCoveredSamples, double initialPofRef) {
if ( contexts == null )
return null;
@ -567,7 +582,7 @@ public class UnifiedGenotyperEngine {
depth = context.getExtendedEventPileup().size();
}
P_of_ref *= 1.0 - (theta / 2.0) * MathUtils.binomialProbability(0, depth, 0.5);
P_of_ref *= 1.0 - (theta / 2.0) * getRefBinomialProb(depth);
}
return new VariantCallContext(vc, QualityUtils.phredScaleErrorRate(1.0 - P_of_ref) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING, false);

View File

@ -26,9 +26,9 @@
package org.broadinstitute.sting.gatk.walkers.indels;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.genotype.Haplotype;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.sting.utils.variantcontext.Allele;

View File

@ -29,8 +29,8 @@ import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.genotype.Haplotype;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -1041,20 +1041,14 @@ public class PairHMMIndelErrorModel {
double[] genotypeLikelihoods = new double[hSize*(hSize+1)/2];
int k=0;
double maxElement = Double.NEGATIVE_INFINITY;
for (int j=0; j < hSize; j++) {
for (int i=0; i <= j; i++){
genotypeLikelihoods[k++] = haplotypeLikehoodMatrix[i][j];
if (haplotypeLikehoodMatrix[i][j] > maxElement)
maxElement = haplotypeLikehoodMatrix[i][j];
}
}
// renormalize
for (int i=0; i < genotypeLikelihoods.length; i++)
genotypeLikelihoods[i] -= maxElement;
return genotypeLikelihoods;
// renormalize so that max element is zero.
return MathUtils.normalizeFromLog10(genotypeLikelihoods, false, true);
}
/**

View File

@ -42,6 +42,7 @@ import java.util.*;
*
* <p>Body test</p>
*/
@Hidden
public class DocumentationTest extends RodWalker<Integer, Integer> {
// the docs for the arguments are in the collection
@ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();

View File

@ -62,14 +62,17 @@ public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker t
annotationId++;
} while (eval.hasAttribute(key));
} else if ( eval.hasAttribute(SnpEff.InfoFieldKey.EFFECT_KEY.getKeyName() ) ) {
SnpEff.EffectType snpEffType = SnpEff.EffectType.valueOf(eval.getAttribute(SnpEff.InfoFieldKey.EFFECT_KEY.getKeyName()).toString());
if ( snpEffType == SnpEff.EffectType.STOP_GAINED )
type = FunctionalType.nonsense;
else if ( snpEffType == SnpEff.EffectType.NON_SYNONYMOUS_CODING )
type = FunctionalType.missense;
else if ( snpEffType == SnpEff.EffectType.SYNONYMOUS_CODING )
type = FunctionalType.silent;
} else if ( eval.hasAttribute(SnpEff.InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName()) ) {
try {
SnpEff.EffectFunctionalClass snpEffFunctionalClass = SnpEff.EffectFunctionalClass.valueOf(eval.getAttribute(SnpEff.InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName()).toString());
if ( snpEffFunctionalClass == SnpEff.EffectFunctionalClass.NONSENSE )
type = FunctionalType.nonsense;
else if ( snpEffFunctionalClass == SnpEff.EffectFunctionalClass.MISSENSE )
type = FunctionalType.missense;
else if ( snpEffFunctionalClass == SnpEff.EffectFunctionalClass.SILENT )
type = FunctionalType.silent;
}
catch ( Exception e ) {} // don't error out if the type isn't supported
}
if ( type != null ) {

View File

@ -233,49 +233,18 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
if (minimumN > 1 && (vcs.size() - numFilteredRecords < minimumN))
return 0;
List<VariantContext> preMergedVCs = new ArrayList<VariantContext>();
List<VariantContext> mergedVCs = new ArrayList<VariantContext>();
Map<VariantContext.Type, List<VariantContext>> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs);
// iterate over the types so that it's deterministic
for ( VariantContext.Type type : VariantContext.Type.values() ) {
if ( VCsByType.containsKey(type) )
preMergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type),
mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type),
priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
}
List<VariantContext> mergedVCs = new ArrayList<VariantContext>();
// se have records merged but separated by type. If a particular record is for example a snp but all alleles are a subset of an existing mixed record,
// we will still merge those records.
if (preMergedVCs.size() > 1) {
for (VariantContext vc1 : preMergedVCs) {
VariantContext newvc = vc1;
boolean merged = false;
for (int k=0; k < mergedVCs.size(); k++) {
VariantContext vc2 = mergedVCs.get(k);
if (VariantContextUtils.allelesAreSubset(vc1,vc2) || VariantContextUtils.allelesAreSubset(vc2,vc1)) {
// all alleles of vc1 are contained in vc2 but they are of different type (say, vc1 is snp, vc2 is complex): try to merget v1 into v2
List<VariantContext> vcpair = new ArrayList<VariantContext>();
vcpair.add(vc1);
vcpair.add(vc2);
newvc = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), vcpair,
priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC);
mergedVCs.set(k,newvc);
merged = true;
break;
}
}
if (!merged)
mergedVCs.add(vc1);
}
}
else {
mergedVCs = preMergedVCs;
}
for ( VariantContext mergedVC : mergedVCs ) {
for ( VariantContext mergedVC : mergedVCs ) {
// only operate at the start of events
if ( mergedVC == null )
continue;

View File

@ -452,7 +452,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
throw new UserException.CouldNotCreateOutputFile(outMVFile, "Can't open output file", e); }
}
else
mvSet.add(new MendelianViolation(getToolkit(), MENDELIAN_VIOLATION_QUAL_THRESHOLD));
mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));
}
else if (!FAMILY_STRUCTURE.isEmpty()) {
mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD));

View File

@ -22,10 +22,9 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.genotype;
package org.broadinstitute.sting.utils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.variantcontext.Allele;

View File

@ -444,11 +444,25 @@ public class MathUtils {
* @return a newly allocated array corresponding the normalized values in array, maybe log10 transformed
*/
public static double[] normalizeFromLog10(double[] array, boolean takeLog10OfOutput) {
double[] normalized = new double[array.length];
return normalizeFromLog10(array, takeLog10OfOutput, false);
}
public static double[] normalizeFromLog10(double[] array, boolean takeLog10OfOutput, boolean keepInLogSpace) {
// for precision purposes, we need to add (or really subtract, since they're
// all negative) the largest value; also, we need to convert to normal-space.
double maxValue = Utils.findMaxEntry(array);
// we may decide to just normalize in log space with converting to linear space
if (keepInLogSpace) {
for (int i = 0; i < array.length; i++)
array[i] -= maxValue;
return array;
}
// default case: go to linear space
double[] normalized = new double[array.length];
for (int i = 0; i < array.length; i++)
normalized[i] = Math.pow(10, array[i] - maxValue);

View File

@ -1,11 +1,13 @@
package org.broadinstitute.sting.utils;
import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.regex.Matcher;
@ -32,6 +34,9 @@ public class MendelianViolation {
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
static final int[] mvOffsets = new int[] { 1,2,5,6,8,11,15,18,20,21,24,25 };
static final int[] nonMVOffsets = new int[]{ 0,3,4,7,9,10,12,13,14,16,17,19,22,23,26 };
public String getSampleMom() {
return sampleMom;
@ -168,4 +173,41 @@ public class MendelianViolation {
return true;
}
/**
* @return the likelihood ratio for a mendelian violation
*/
public double violationLikelihoodRatio(VariantContext vc) {
double[] logLikAssignments = new double[27];
// the matrix to set up is
// MOM DAD CHILD
// |- AA
// AA AA | AB
// |- BB
// |- AA
// AA AB | AB
// |- BB
// etc. The leaves are counted as 0-11 for MVs and 0-14 for non-MVs
double[] momGL = vc.getGenotype(sampleMom).getLikelihoods().getAsVector();
double[] dadGL = vc.getGenotype(sampleDad).getLikelihoods().getAsVector();
double[] childGL = vc.getGenotype(sampleChild).getLikelihoods().getAsVector();
int offset = 0;
for ( int oMom = 0; oMom < 3; oMom++ ) {
for ( int oDad = 0; oDad < 3; oDad++ ) {
for ( int oChild = 0; oChild < 3; oChild ++ ) {
logLikAssignments[offset++] = momGL[oMom] + dadGL[oDad] + childGL[oChild];
}
}
}
double[] mvLiks = new double[12];
double[] nonMVLiks = new double[15];
for ( int i = 0; i < 12; i ++ ) {
mvLiks[i] = logLikAssignments[mvOffsets[i]];
}
for ( int i = 0; i < 15; i++) {
nonMVLiks[i] = logLikAssignments[nonMVOffsets[i]];
}
return MathUtils.log10sumLog10(mvLiks) - MathUtils.log10sumLog10(nonMVLiks);
}
}

View File

@ -398,7 +398,9 @@ public class ClippingOp {
for (int i = 1; i <= 2; i++) {
int shift = 0;
int totalHardClip = 0;
boolean readHasStarted = false;
boolean addedHardClips = false;
while(!cigarStack.empty()) {
CigarElement cigarElement = cigarStack.pop();
@ -408,14 +410,33 @@ public class ClippingOp {
cigarElement.getOperator() != CigarOperator.DELETION &&
cigarElement.getOperator() != CigarOperator.HARD_CLIP)
readHasStarted = true;
else if ( !readHasStarted && cigarElement.getOperator() == CigarOperator.HARD_CLIP)
totalHardClip += cigarElement.getLength();
else if ( !readHasStarted && cigarElement.getOperator() == CigarOperator.INSERTION)
shift += cigarElement.getLength();
if (readHasStarted || cigarElement.getOperator() == CigarOperator.HARD_CLIP) {
if (i==1)
else if ( !readHasStarted && cigarElement.getOperator() == CigarOperator.DELETION)
totalHardClip += cigarElement.getLength();
if (readHasStarted) {
if (i==1) {
if (!addedHardClips) {
if (totalHardClip > 0)
inverseCigarStack.push(new CigarElement(totalHardClip, CigarOperator.HARD_CLIP));
addedHardClips = true;
}
inverseCigarStack.push(cigarElement);
else
}
else {
if (!addedHardClips) {
if (totalHardClip > 0)
cleanCigar.add(new CigarElement(totalHardClip, CigarOperator.HARD_CLIP));
addedHardClips = true;
}
cleanCigar.add(cigarElement);
}
}
}
// first pass (i=1) is from end to start of the cigar elements
@ -432,39 +453,35 @@ public class ClippingOp {
}
private int calculateAlignmentStartShift(Cigar oldCigar, Cigar newCigar) {
int shift = 0;
int newShift = 0;
int oldShift = 0;
// Rewind to previous start (by counting everything that was already clipped in this read)
for (CigarElement cigarElement : oldCigar.getCigarElements()) {
if (!cigarElement.getOperator().consumesReferenceBases())
shift -= cigarElement.getLength();
else
break;
}
// Advance to new start (by counting everything new that has been clipped )
for (CigarElement cigarElement : newCigar.getCigarElements()) {
if (!cigarElement.getOperator().consumesReferenceBases())
shift += cigarElement.getLength();
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP)
newShift += cigarElement.getLength();
else
break;
}
return shift;
for (CigarElement cigarElement : oldCigar.getCigarElements()) {
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP )
oldShift += Math.min(cigarElement.getLength(), newShift - oldShift);
else
break;
}
return newShift - oldShift;
}
private int calculateHardClippingAlignmentShift(CigarElement cigarElement, int clippedLength) {
if (cigarElement.getOperator() == CigarOperator.INSERTION) {
int cigarElementLength = cigarElement.getLength();
if (clippedLength >= cigarElementLength)
return -cigarElement.getLength();
else
return -clippedLength;
}
// Insertions should be discounted from the total hard clip count
if (cigarElement.getOperator() == CigarOperator.INSERTION)
return -clippedLength;
if (cigarElement.getOperator() == CigarOperator.DELETION)
// Deletions should be added to the total hard clip count
else if (cigarElement.getOperator() == CigarOperator.DELETION)
return cigarElement.getLength();
// There is no shift if we are not clipping an indel
return 0;
}

View File

@ -21,6 +21,8 @@ public enum ClippingRepresentation {
SOFTCLIP_BASES,
/**
* WARNING: THIS OPTION IS STILL UNDER DEVELOPMENT AND IS NOT SUPPORTED.
*
* Change the read's cigar string to hard clip (H, see sam-spec) away the bases.
* Hard clipping, unlike soft clipping, actually removes bases from the read,
* reducing the resulting file's size but introducing an irrevesible (i.e.,

View File

@ -69,21 +69,21 @@ public class ReadClipper {
private SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart);
int stop = (refStop < 0) ? read.getReadLength() - 1: ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop);
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop);
if (start < 0 || stop > read.getReadLength() - 1 + numDeletions(read))
if (start < 0 || stop > read.getReadLength() - 1)
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
// TODO add check in the Hardclip function
if ( start > stop )
stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, ReadUtils.getRefCoordSoftUnclippedEnd(read));
if ( start > stop ) {
// stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, ReadUtils.getRefCoordSoftUnclippedEnd(read));
throw new ReviewedStingException("START > STOP -- this should never happen -- call Mauricio!");
}
//This tries to fix the bug where the deletion is counted a read base and as a result, the hardCLipper runs into
//an endless loop when hard clipping the cigar string because the read coordinates are not covered by the read
stop -= numDeletions(read);
if ( start > stop )
start -= numDeletions(read);
// stop -= numDeletions(read);
// if ( start > stop )
// start -= numDeletions(read);
//System.out.println("Clipping start/stop: " + start + "/" + stop);
@ -128,6 +128,39 @@ public class ReadClipper {
return this.clipRead(ClippingRepresentation.HARDCLIP_BASES);
}
public SAMRecord hardClipSoftClippedBases () {
int readIndex = 0;
int cutLeft = -1; // first position to hard clip (inclusive)
int cutRight = -1; // first position to hard clip (inclusive)
boolean rightTail = false; // trigger to stop clipping the left tail and start cutting the right tail
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
if (cigarElement.getOperator() == CigarOperator.SOFT_CLIP) {
if (rightTail) {
cutRight = readIndex;
}
else {
cutLeft = readIndex + cigarElement.getLength() - 1;
}
}
else if (cigarElement.getOperator() != CigarOperator.HARD_CLIP)
rightTail = true;
if (cigarElement.getOperator().consumesReadBases())
readIndex += cigarElement.getLength();
}
// It is extremely important that we cut the end first otherwise the read coordinates change.
if (cutRight >= 0)
this.addOp(new ClippingOp(cutRight, read.getReadLength() - 1));
if (cutLeft >= 0)
this.addOp(new ClippingOp(0, cutLeft));
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
}
/**
* Return a new read corresponding to this.read that's been clipped according to ops, if any are present.
*
@ -150,4 +183,21 @@ public class ReadClipper {
}
}
}
public SAMRecord hardClipLeadingInsertions() {
for(CigarElement cigarElement : read.getCigar().getCigarElements()) {
if (cigarElement.getOperator() != CigarOperator.HARD_CLIP && cigarElement.getOperator() != CigarOperator.SOFT_CLIP &&
cigarElement.getOperator() != CigarOperator.INSERTION && cigarElement.getOperator() != CigarOperator.DELETION)
break;
else if (cigarElement.getOperator() == CigarOperator.INSERTION) {
this.addOp(new ClippingOp(0, cigarElement.getLength() - 1));
}
else if (cigarElement.getOperator() == CigarOperator.DELETION) {
throw new ReviewedStingException("No read should start with a deletion. Aligner bug?");
}
}
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
}
}

View File

@ -115,15 +115,21 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
}
arrayIndex++;
}
boolean sawFormatTag = false;
if ( arrayIndex < strings.length ) {
if ( !strings[arrayIndex].equals("FORMAT") )
throw new TribbleException.InvalidHeader("we were expecting column name 'FORMAT' but we saw '" + strings[arrayIndex] + "'");
sawFormatTag = true;
arrayIndex++;
}
while (arrayIndex < strings.length)
while ( arrayIndex < strings.length )
auxTags.add(strings[arrayIndex++]);
if ( sawFormatTag && auxTags.size() == 0 )
throw new UserException.MalformedVCFHeader("The FORMAT field was provided but there is no genotype/sample data");
} else {
if ( str.startsWith("##INFO=") ) {
VCFInfoHeaderLine info = new VCFInfoHeaderLine(str.substring(7),version);
@ -200,28 +206,24 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
* @return a VariantContext
*/
public Feature decode(String line) {
return reallyDecode(line);
}
// the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line
if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null;
private Feature reallyDecode(String line) {
// the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line
if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null;
// our header cannot be null, we need the genotype sample names and counts
if (header == null) throw new ReviewedStingException("VCF Header cannot be null when decoding a record");
// our header cannot be null, we need the genotype sample names and counts
if (header == null) throw new ReviewedStingException("VCF Header cannot be null when decoding a record");
if (parts == null)
parts = new String[Math.min(header.getColumnCount(), NUM_STANDARD_FIELDS+1)];
if (parts == null)
parts = new String[Math.min(header.getColumnCount(), NUM_STANDARD_FIELDS+1)];
int nParts = ParsingUtils.split(line, parts, VCFConstants.FIELD_SEPARATOR_CHAR, true);
int nParts = ParsingUtils.split(line, parts, VCFConstants.FIELD_SEPARATOR_CHAR, true);
// if we have don't have a header, or we have a header with no genotyping data check that we have eight columns. Otherwise check that we have nine (normal colummns + genotyping data)
if (( (header == null || !header.hasGenotypingData()) && nParts != NUM_STANDARD_FIELDS) ||
(header != null && header.hasGenotypingData() && nParts != (NUM_STANDARD_FIELDS + 1)) )
throw new UserException.MalformedVCF("there aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) +
" tokens, and saw " + nParts + " )", lineNo);
// if we have don't have a header, or we have a header with no genotyping data check that we have eight columns. Otherwise check that we have nine (normal colummns + genotyping data)
if (( (header == null || !header.hasGenotypingData()) && nParts != NUM_STANDARD_FIELDS) ||
(header != null && header.hasGenotypingData() && nParts != (NUM_STANDARD_FIELDS + 1)) )
throw new UserException.MalformedVCF("there aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) +
" tokens, and saw " + nParts + " )", lineNo);
return parseVCFLine(parts);
return parseVCFLine(parts);
}
protected void generateException(String message) {

View File

@ -35,9 +35,6 @@ public class VCFHeader {
// the header string indicator
public static final String HEADER_INDICATOR = "#";
/** do we have genotying data? */
private boolean hasGenotypingData = false;
// were the input samples sorted originally (or are we sorting them)?
private boolean samplesWereAlreadySorted = true;
@ -57,17 +54,15 @@ public class VCFHeader {
* create a VCF header, given a list of meta data and auxillary tags
*
* @param metaData the meta data associated with this header
* @param genotypeSampleNames the genotype format field, and the sample names
* @param genotypeSampleNames the sample names
*/
public VCFHeader(Set<VCFHeaderLine> metaData, Set<String> genotypeSampleNames) {
mMetaData = new TreeSet<VCFHeaderLine>();
if ( metaData != null )
mMetaData.addAll(metaData);
for (String col : genotypeSampleNames) {
if (!col.equals("FORMAT"))
mGenotypeSampleNames.add(col);
}
if (genotypeSampleNames.size() > 0) hasGenotypingData = true;
mGenotypeSampleNames.addAll(genotypeSampleNames);
loadVCFVersion();
loadMetaDataMaps();
@ -157,7 +152,7 @@ public class VCFHeader {
* @return true if we have genotyping columns, false otherwise
*/
public boolean hasGenotypingData() {
return hasGenotypingData;
return mGenotypeSampleNames.size() > 0;
}
/**
@ -171,7 +166,7 @@ public class VCFHeader {
/** @return the column count */
public int getColumnCount() {
return HEADER_FIELDS.values().length + ((hasGenotypingData) ? mGenotypeSampleNames.size() + 1 : 0);
return HEADER_FIELDS.values().length + (hasGenotypingData() ? mGenotypeSampleNames.size() + 1 : 0);
}
/**

View File

@ -174,6 +174,12 @@ public class UserException extends ReviewedStingException {
}
}
public static class MalformedVCFHeader extends UserException {
public MalformedVCFHeader(String message) {
super(String.format("The provided VCF file has a malformed header: %s", message));
}
}
public static class ReadMissingReadGroup extends MalformedBAM {
public ReadMissingReadGroup(SAMRecord read) {
super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK. Please use http://www.broadinstitute.org/gsa/wiki/index.php/ReplaceReadGroups to fix this problem", read.getReadName()));

View File

@ -66,7 +66,8 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {
public boolean includeInDocs(ClassDoc doc) {
try {
Class type = HelpUtils.getClassForDoc(doc);
return JVMUtils.isConcrete(type);
boolean hidden = ! getDoclet().showHiddenFeatures() && type.isAnnotationPresent(Hidden.class);
return ! hidden && JVMUtils.isConcrete(type);
} catch ( ClassNotFoundException e ) {
return false;
}

View File

@ -157,7 +157,7 @@ public class ReadUtils {
* |----------------| (interval)
* <--------> (read)
*/
public enum ReadAndIntervalOverlap {NO_OVERLAP_CONTIG, NO_OVERLAP_LEFT, NO_OVERLAP_RIGHT, OVERLAP_LEFT, OVERLAP_RIGHT, OVERLAP_LEFT_AND_RIGHT, OVERLAP_CONTAINED}
public enum ReadAndIntervalOverlap {NO_OVERLAP_CONTIG, NO_OVERLAP_LEFT, NO_OVERLAP_RIGHT, NO_OVERLAP_HARDCLIPPED_LEFT, NO_OVERLAP_HARDCLIPPED_RIGHT, OVERLAP_LEFT, OVERLAP_RIGHT, OVERLAP_LEFT_AND_RIGHT, OVERLAP_CONTAINED}
/**
* God, there's a huge information asymmetry in SAM format:
@ -640,27 +640,35 @@ public class ReadUtils {
*/
public static ReadAndIntervalOverlap getReadAndIntervalOverlapType(SAMRecord read, GenomeLoc interval) {
int start = getRefCoordSoftUnclippedStart(read);
int stop = getRefCoordSoftUnclippedEnd(read);
int sStart = getRefCoordSoftUnclippedStart(read);
int sStop = getRefCoordSoftUnclippedEnd(read);
int uStart = read.getUnclippedStart();
int uStop = read.getUnclippedEnd();
if ( !read.getReferenceName().equals(interval.getContig()) )
return ReadAndIntervalOverlap.NO_OVERLAP_CONTIG;
else if ( stop < interval.getStart() )
else if ( uStop < interval.getStart() )
return ReadAndIntervalOverlap.NO_OVERLAP_LEFT;
else if ( start > interval.getStop() )
else if ( uStart > interval.getStop() )
return ReadAndIntervalOverlap.NO_OVERLAP_RIGHT;
else if ( (start >= interval.getStart()) &&
(stop <= interval.getStop()) )
else if ( sStop < interval.getStart() )
return ReadAndIntervalOverlap.NO_OVERLAP_HARDCLIPPED_LEFT;
else if ( sStart > interval.getStop() )
return ReadAndIntervalOverlap.NO_OVERLAP_HARDCLIPPED_RIGHT;
else if ( (sStart >= interval.getStart()) &&
(sStop <= interval.getStop()) )
return ReadAndIntervalOverlap.OVERLAP_CONTAINED;
else if ( (start < interval.getStart()) &&
(stop > interval.getStop()) )
else if ( (sStart < interval.getStart()) &&
(sStop > interval.getStop()) )
return ReadAndIntervalOverlap.OVERLAP_LEFT_AND_RIGHT;
else if ( (start < interval.getStart()) )
else if ( (sStart < interval.getStart()) )
return ReadAndIntervalOverlap.OVERLAP_LEFT;
else
@ -787,5 +795,47 @@ public class ReadUtils {
return readBases;
}
public static SAMRecord unclipSoftClippedBases(SAMRecord rec) {
int newReadStart = rec.getAlignmentStart();
int newReadEnd = rec.getAlignmentEnd();
List<CigarElement> newCigarElements = new ArrayList<CigarElement>(rec.getCigar().getCigarElements().size());
int heldOver = -1;
boolean sSeen = false;
for ( CigarElement e : rec.getCigar().getCigarElements() ) {
if ( e.getOperator().equals(CigarOperator.S) ) {
newCigarElements.add(new CigarElement(e.getLength(),CigarOperator.M));
if ( sSeen ) {
newReadEnd += e.getLength();
sSeen = true;
} else {
newReadStart -= e.getLength();
}
} else {
newCigarElements.add(e);
}
}
// merge duplicate operators together
int idx = 0;
List<CigarElement> finalCigarElements = new ArrayList<CigarElement>(rec.getCigar().getCigarElements().size());
while ( idx < newCigarElements.size() -1 ) {
if ( newCigarElements.get(idx).getOperator().equals(newCigarElements.get(idx+1).getOperator()) ) {
int combSize = newCigarElements.get(idx).getLength();
int offset = 0;
while ( idx + offset < newCigarElements.size()-1 && newCigarElements.get(idx+offset).getOperator().equals(newCigarElements.get(idx+1+offset).getOperator()) ) {
combSize += newCigarElements.get(idx+offset+1).getLength();
offset++;
}
finalCigarElements.add(new CigarElement(combSize,newCigarElements.get(idx).getOperator()));
idx = idx + offset -1;
} else {
finalCigarElements.add(newCigarElements.get(idx));
}
idx++;
}
rec.setCigar(new Cigar(finalCigarElements));
rec.setAlignmentStart(newReadStart);
return rec;
}
}

View File

@ -26,14 +26,31 @@ public class Genotype {
protected boolean filtersWereAppliedToContext;
public Genotype(String sampleName, List<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes, boolean isPhased) {
this(sampleName, alleles, negLog10PError, filters, attributes, isPhased, null);
}
public Genotype(String sampleName, List<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes, boolean isPhased, double[] log10Likelihoods) {
if ( alleles != null )
this.alleles = Collections.unmodifiableList(alleles);
commonInfo = new InferredGeneticContext(sampleName, negLog10PError, filters, attributes);
if ( log10Likelihoods != null )
commonInfo.putAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(log10Likelihoods));
filtersWereAppliedToContext = filters != null;
this.isPhased = isPhased;
validate();
}
/**
* Creates a new Genotype for sampleName with genotype according to alleles.
* @param sampleName
* @param alleles
* @param negLog10PError the confidence in these alleles
* @param log10Likelihoods a log10 likelihoods for each of the genotype combinations possible for alleles, in the standard VCF ordering, or null if not known
*/
public Genotype(String sampleName, List<Allele> alleles, double negLog10PError, double[] log10Likelihoods) {
this(sampleName, alleles, negLog10PError, null, null, false, log10Likelihoods);
}
public Genotype(String sampleName, List<Allele> alleles, double negLog10PError) {
this(sampleName, alleles, negLog10PError, null, null, false);
}
@ -57,13 +74,6 @@ public class Genotype {
return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attributes, g.isPhased());
}
public static Genotype removePLs(Genotype g) {
Map<String, Object> attrs = new HashMap<String, Object>(g.getAttributes());
attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
attrs.remove(VCFConstants.GENOTYPE_LIKELIHOODS_KEY);
return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attrs, g.isPhased());
}
public static Genotype modifyAlleles(Genotype g, List<Allele> alleles) {
return new Genotype(g.getSampleName(), alleles, g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, g.getAttributes(), g.isPhased());
}
@ -258,7 +268,8 @@ public class Genotype {
* @param <V> the value type
* @return a sting, enclosed in {}, with comma seperated key value pairs in order of the keys
*/
public static <T extends Comparable<T>, V> String sortedString(Map<T, V> c) {
private static <T extends Comparable<T>, V> String sortedString(Map<T, V> c) {
// NOTE -- THIS IS COPIED FROM GATK UTILS TO ALLOW US TO KEEP A SEPARATION BETWEEN THE GATK AND VCF CODECS
List<T> t = new ArrayList<T>(c.keySet());
Collections.sort(t);

View File

@ -1495,7 +1495,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
// Do not change the filter state if filters were not applied to this context
Set<String> inputVCFilters = inputVC.filtersWereAppliedToContext ? inputVC.getFilters() : null;
return new VariantContext(inputVC.getSource(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(), inputVCFilters, inputVC.getAttributes());
return new VariantContext(inputVC.getSource(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(), inputVCFilters, inputVC.getAttributes(),refByte);
}
else
return inputVC;

View File

@ -44,6 +44,11 @@ import java.io.Serializable;
import java.util.*;
public class VariantContextUtils {
public final static String MERGE_INTERSECTION = "Intersection";
public final static String MERGE_FILTER_IN_ALL = "FilteredInAll";
public final static String MERGE_REF_IN_ALL = "ReferenceInAll";
public final static String MERGE_FILTER_PREFIX = "filterIn";
final public static JexlEngine engine = new JexlEngine();
static {
engine.setSilent(false); // will throw errors now for selects that don't evaluate properly
@ -154,6 +159,13 @@ public class VariantContextUtils {
return "%." + precision + "f";
}
public static Genotype removePLs(Genotype g) {
Map<String, Object> attrs = new HashMap<String, Object>(g.getAttributes());
attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
attrs.remove(VCFConstants.GENOTYPE_LIKELIHOODS_KEY);
return new Genotype(g.getSampleName(), g.getAlleles(), g.getNegLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attrs, g.isPhased());
}
/**
* A simple but common wrapper for matching VariantContext objects using JEXL expressions
*/
@ -594,7 +606,9 @@ public class VariantContextUtils {
// if we have more alternate alleles in the merged VC than in one or more of the original VCs, we need to strip out the GL/PLs (because they are no longer accurate)
for ( VariantContext vc : VCs ) {
if ( vc.alleles.size() != alleles.size() ) {
if (vc.alleles.size() == 1)
continue;
if ( vc.alleles.size() != alleles.size()) {
genotypes = stripPLs(genotypes);
break;
}
@ -609,19 +623,20 @@ public class VariantContextUtils {
if ( filteredRecordMergeType == FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED && nFiltered != VCs.size() )
filters.clear();
if ( annotateOrigin ) { // we care about where the call came from
String setValue;
if ( nFiltered == 0 && variantSources.size() == priorityListOfVCs.size() ) // nothing was unfiltered
setValue = "Intersection";
setValue = MERGE_INTERSECTION;
else if ( nFiltered == VCs.size() ) // everything was filtered out
setValue = "FilteredInAll";
setValue = MERGE_FILTER_IN_ALL;
else if ( variantSources.isEmpty() ) // everyone was reference
setValue = "ReferenceInAll";
setValue = MERGE_REF_IN_ALL;
else {
LinkedHashSet<String> s = new LinkedHashSet<String>();
for ( VariantContext vc : VCs )
if ( vc.isVariant() )
s.add( vc.isFiltered() ? "filterIn" + vc.getSource() : vc.getSource() );
s.add( vc.isFiltered() ? MERGE_FILTER_PREFIX + vc.getSource() : vc.getSource() );
setValue = Utils.join("-", s);
}
@ -734,7 +749,7 @@ public class VariantContextUtils {
Map<String, Genotype> newGs = new HashMap<String, Genotype>(genotypes.size());
for ( Map.Entry<String, Genotype> g : genotypes.entrySet() ) {
newGs.put(g.getKey(), g.getValue().hasLikelihoods() ? Genotype.removePLs(g.getValue()) : g.getValue());
newGs.put(g.getKey(), g.getValue().hasLikelihoods() ? removePLs(g.getValue()) : g.getValue());
}
return newGs;
@ -743,9 +758,46 @@ public class VariantContextUtils {
public static Map<VariantContext.Type, List<VariantContext>> separateVariantContextsByType(Collection<VariantContext> VCs) {
HashMap<VariantContext.Type, List<VariantContext>> mappedVCs = new HashMap<VariantContext.Type, List<VariantContext>>();
for ( VariantContext vc : VCs ) {
if ( !mappedVCs.containsKey(vc.getType()) )
mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
mappedVCs.get(vc.getType()).add(vc);
// look at previous variant contexts of different type. If:
// a) otherVC has alleles which are subset of vc, remove otherVC from its list and add otherVC to vc's list
// b) vc has alleles which are subset of otherVC. Then, add vc to otherVC's type list (rather, do nothing since vc will be added automatically to its list)
// c) neither: do nothing, just add vc to its own list
boolean addtoOwnList = true;
for (VariantContext.Type type : VariantContext.Type.values()) {
if (type.equals(vc.getType()))
continue;
if (!mappedVCs.containsKey(type))
continue;
List<VariantContext> vcList = mappedVCs.get(type);
for (int k=0; k < vcList.size(); k++) {
VariantContext otherVC = vcList.get(k);
if (allelesAreSubset(otherVC,vc)) {
// otherVC has a type different than vc and its alleles are a subset of vc: remove otherVC from its list and add it to vc's type list
vcList.remove(k);
// avoid having empty lists
if (vcList.size() == 0)
mappedVCs.remove(vcList);
if ( !mappedVCs.containsKey(vc.getType()) )
mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
mappedVCs.get(vc.getType()).add(otherVC);
break;
}
else if (allelesAreSubset(vc,otherVC)) {
// vc has a type different than otherVC and its alleles are a subset of VC: add vc to otherVC's type list and don't add to its own
mappedVCs.get(type).add(vc);
addtoOwnList = false;
break;
}
}
}
if (addtoOwnList) {
if ( !mappedVCs.containsKey(vc.getType()) )
mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
mappedVCs.get(vc.getType()).add(vc);
}
}
return mappedVCs;

View File

@ -132,15 +132,21 @@ public abstract class BaseTest {
*/
public static class TestDataProvider {
private static final Map<Class, List<Object>> tests = new HashMap<Class, List<Object>>();
private final String name;
/**
* Create a new TestDataProvider instance bound to the class variable C
* @param c
*/
public TestDataProvider(Class c) {
public TestDataProvider(Class c, String name) {
if ( ! tests.containsKey(c) )
tests.put(c, new ArrayList<Object>());
tests.get(c).add(this);
this.name = name;
}
public TestDataProvider(Class c) {
this(c, "");
}
/**
@ -153,6 +159,11 @@ public abstract class BaseTest {
for ( Object x : tests.get(c) ) params2.add(new Object[]{x});
return params2.toArray(new Object[][]{});
}
@Override
public String toString() {
return "TestDataProvider("+name+")";
}
}
/**

View File

@ -33,7 +33,7 @@ public class SymbolicAllelesIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(b36KGReference, "symbolic_alleles_2.vcf"),
1,
Arrays.asList("6645babc8c7d46be0da223477c7b1291"));
Arrays.asList("3008d6f5044bc14801e5c58d985dec72"));
executeTest("Test symbolic alleles mixed in with non-symbolic alleles", spec);
}
}

View File

@ -132,9 +132,9 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantAnnotator -R " + hg19Reference + " -NO_HEADER -o %s -A SnpEff --variant " +
validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile " + validationDataLocation +
"snpEff.AFR.unfiltered.vcf -L 1:1-1,500,000",
"snpEff.AFR.unfiltered.vcf -L 1:1-1,500,000 -L 2:232,325,429",
1,
Arrays.asList("ed9d1b37b0bd8b65ff9ce2688e0e102e")
Arrays.asList("122321a85e448f21679f6ca15c5e22ad")
);
executeTest("Testing SnpEff annotations", spec);
}

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.utils.genotype;
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
import org.testng.Assert;
import org.broadinstitute.sting.BaseTest;

View File

@ -1,7 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.testng.Assert;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import org.broadinstitute.sting.BaseTest;
import org.testng.annotations.Test;

View File

@ -42,7 +42,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
Arrays.asList("ec43daadfb15b00b41aeb0017a45df0b"));
Arrays.asList("6458f3b8fe4954e2ffc2af972aaab19e"));
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
}

View File

@ -30,7 +30,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("f5f811ceb973d7fd6c1b2b734f1b2b12")
Arrays.asList("d9dcb352c53106f54fcc981f15d35a90")
);
executeTest("testFunctionClassWithSnpeff", spec);
}

View File

@ -89,8 +89,8 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
@Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "0f873fed02aa99db5b140bcd6282c10a"); }
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f"); } // official project VCF files in tabix format
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "20163d60f18a46496f6da744ab5cc0f9"); } // official project VCF files in tabix format
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "312a22aedb088b678bc891f1a1b03c91"); }
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "96941ee177b0614a9879af0ac3218963"); } // official project VCF files in tabix format
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "f1c8720fde62687c2e861217670d8b3c"); }
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e144b6283765494bfe8189ac59965083"); }
@ -110,7 +110,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
" -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
" -genotypeMergeOptions UNIQUIFY -L 1"),
1,
Arrays.asList("35acb0f15f9cd18c653ede4e15e365c9"));
Arrays.asList("b14f8cbb5d03a2e613b12da4da9efd9a"));
executeTest("threeWayWithRefs", spec);
}
@ -137,7 +137,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T CombineVariants -NO_HEADER -L 1:902000-903000 -o %s -R " + b37KGReference + " -V:v1 " + b37dbSNP132,
1,
Arrays.asList(""));
Arrays.asList("5969446769cb8377daa2db29304ae6b5"));
executeTest("combineDBSNPDuplicateSites:", spec);
}
}

View File

@ -16,7 +16,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant:VCF3 " + testfile),
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile),
1,
Arrays.asList("d18516c1963802e92cb9e425c0b75fd6")
);
@ -30,7 +30,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
String samplesFile = validationDataLocation + "SelectVariants.samples.txt";
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + b36KGReference + " -L 1:1-1000000 -o %s -NO_HEADER -xl_sn A -xl_sf " + samplesFile + " --variant:VCF3 " + testfile,
"-T SelectVariants -R " + b36KGReference + " -L 1:1-1000000 -o %s -NO_HEADER -xl_sn A -xl_sf " + samplesFile + " --variant " + testfile,
1,
Arrays.asList("730f021fd6ecf1d195dabbee2e233bfd")
);
@ -43,7 +43,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
String testfile = validationDataLocation + "test.dup.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -sn A -sn B -sn C --variant:VCF3 " + testfile),
baseTestString(" -sn A -sn B -sn C --variant " + testfile),
1,
Arrays.asList("b74038779fe6485dbb8734ae48178356")
);
@ -56,7 +56,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
String testFile = validationDataLocation + "NA12878.hg19.example1.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 --variant:VCF " + b37hapmapGenotypes + " -disc:VCF " + testFile + " -o %s -NO_HEADER",
"-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 --variant " + b37hapmapGenotypes + " -disc " + testFile + " -o %s -NO_HEADER",
1,
Arrays.asList("78e6842325f1f1bc9ab30d5e7737ee6e")
);
@ -69,7 +69,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
String testFile = validationDataLocation + "NA12878.hg19.example1.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 -conc:VCF " + b37hapmapGenotypes + " --variant " + testFile + " -o %s -NO_HEADER",
"-T SelectVariants -R " + hg19Reference + " -sn NA12878 -L 20:1012700-1020000 -conc " + b37hapmapGenotypes + " --variant " + testFile + " -o %s -NO_HEADER",
1,
Arrays.asList("d2ba3ea30a810f6f0fbfb1b643292b6a")
);
@ -90,16 +90,16 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
executeTest("testVariantTypeSelection--" + testFile, spec);
}
@Test(enabled=false)
public void testRemovePLs() {
@Test
public void testUsingDbsnpName() {
String testFile = validationDataLocation + "combine.3.vcf";
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + b36KGReference + " -sn NA12892 --variant " + testFile + " -o %s -NO_HEADER",
"-T SelectVariants -R " + b36KGReference + " -sn NA12892 --variant:dbsnp " + testFile + " -o %s -NO_HEADER",
1,
Arrays.asList("")
Arrays.asList("167a1265df820978a74c267df44d5c43")
);
executeTest("testWithPLs--" + testFile, spec);
executeTest("testUsingDbsnpName--" + testFile, spec);
}
}

View File

@ -34,6 +34,7 @@ import java.io.File;
import java.util.*;
public class JnaSessionIntegrationTest extends BaseTest {
private String implementation = null;
private static final SessionFactory factory = new JnaSessionFactory();
@Test
@ -44,10 +45,23 @@ public class JnaSessionIntegrationTest extends BaseTest {
System.out.println(String.format("DRMAA contact(s): %s", session.getContact()));
System.out.println(String.format("DRM system(s): %s", session.getDrmSystem()));
System.out.println(String.format("DRMAA implementation(s): %s", session.getDrmaaImplementation()));
this.implementation = session.getDrmaaImplementation();
}
@Test
@Test(dependsOnMethods = { "testDrmaa" })
public void testSubmitEcho() throws Exception {
if (implementation.contains("LSF")) {
System.err.println(" ***********************************************************");
System.err.println(" *************************************************************");
System.err.println(" **** ****");
System.err.println(" **** Skipping JnaSessionIntegrationTest.testSubmitEcho() ****");
System.err.println(" **** Are you using the dotkit .combined_LSF_SGE? ****");
System.err.println(" **** ****");
System.err.println(" *************************************************************");
System.err.println(" ***********************************************************");
return;
}
File outFile = createNetworkTempFile("JnaSessionIntegrationTest-", ".out");
Session session = factory.getSession();
session.init(null);

View File

@ -105,7 +105,6 @@ public class VCFWriterUnitTest extends BaseTest {
public static VCFHeader createFakeHeader(Set<VCFHeaderLine> metaData, Set<String> additionalColumns) {
metaData.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_0.getFormatString(), VCFHeaderVersion.VCF4_0.getVersionString()));
metaData.add(new VCFHeaderLine("two", "2"));
additionalColumns.add("FORMAT");
additionalColumns.add("extra1");
additionalColumns.add("extra2");
return new VCFHeader(metaData, additionalColumns);
@ -159,6 +158,6 @@ public class VCFWriterUnitTest extends BaseTest {
Assert.assertTrue(additionalColumns.contains(key));
index++;
}
Assert.assertEquals(index+1, additionalColumns.size() /* for the header field we don't see */);
Assert.assertEquals(index, additionalColumns.size());
}
}

View File

@ -0,0 +1,84 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
// our package
package org.broadinstitute.sting.utils.variantcontext;
// the imports for unit testing.
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
import org.testng.annotations.BeforeSuite;
import org.testng.annotations.Test;
import java.util.Arrays;
import java.util.List;
public class GenotypeUnitTest extends BaseTest {
Allele A, Aref, T;
@BeforeSuite
public void before() {
A = Allele.create("A");
Aref = Allele.create("A", true);
T = Allele.create("T");
}
// public Genotype(String sampleName, List<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes, boolean isPhased) {
// public Genotype(String sampleName, List<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes, boolean isPhased, double[] log10Likelihoods) {
// public Genotype(String sampleName, List<Allele> alleles, double negLog10PError, double[] log10Likelihoods)
// public Genotype(String sampleName, List<Allele> alleles, double negLog10PError)
// public Genotype(String sampleName, List<Allele> alleles)
// public List<Allele> getAlleles()
// public List<Allele> getAlleles(Allele allele)
// public Allele getAllele(int i)
// public boolean isPhased()
// public int getPloidy()
// public Type getType()
// public boolean isHom()
// public boolean isHomRef()
// public boolean isHomVar()
// public boolean isHet()
// public boolean isNoCall()
// public boolean isCalled()
// public boolean isAvailable()
// public boolean hasLikelihoods()
// public GenotypeLikelihoods getLikelihoods()
// public boolean sameGenotype(Genotype other)
// public boolean sameGenotype(Genotype other, boolean ignorePhase)
// public String getSampleName()
// public boolean hasNegLog10PError()
// public double getNegLog10PError()
// public double getPhredScaledQual()
// public boolean hasAttribute(String key)
// public Object getAttribute(String key)
// public Object getAttribute(String key, Object defaultValue)
// public String getAttributeAsString(String key, String defaultValue)
// public int getAttributeAsInt(String key, int defaultValue)
// public double getAttributeAsDouble(String key, double defaultValue)
// public boolean getAttributeAsBoolean(String key, boolean defaultValue)
}

View File

@ -5,6 +5,7 @@ package org.broadinstitute.sting.utils.variantcontext;
// the imports for unit testing.
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
import org.testng.annotations.BeforeSuite;
import org.testng.annotations.BeforeTest;
@ -14,10 +15,7 @@ import java.util.Arrays;
import java.util.List;
/**
* Basic unit test for RecalData
*/
public class VariantContextUnitTest {
public class VariantContextUnitTest extends BaseTest {
Allele A, Aref, T, Tref;
Allele del, delRef, ATC, ATCref;

View File

@ -0,0 +1,562 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.variantcontext;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.testng.Assert;
import org.testng.annotations.BeforeSuite;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.*;
public class VariantContextUtilsUnitTest extends BaseTest {
Allele Aref, T, C, delRef, Cref, ATC, ATCATC;
private GenomeLocParser genomeLocParser;
@BeforeSuite
public void setup() {
final File referenceFile = new File(b37KGReference);
try {
IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(referenceFile);
genomeLocParser = new GenomeLocParser(seq);
}
catch(FileNotFoundException ex) {
throw new UserException.CouldNotReadInputFile(referenceFile,ex);
}
// alleles
Aref = Allele.create("A", true);
Cref = Allele.create("C", true);
delRef = Allele.create("-", true);
T = Allele.create("T");
C = Allele.create("C");
ATC = Allele.create("ATC");
ATCATC = Allele.create("ATCATC");
}
private Genotype makeG(String sample, Allele a1, Allele a2) {
return new Genotype(sample, Arrays.asList(a1, a2));
}
private Genotype makeG(String sample, Allele a1, Allele a2, double log10pError, double l1, double l2, double l3) {
return new Genotype(sample, Arrays.asList(a1, a2), log10pError, new double[]{l1,l2,l3});
}
private Genotype makeG(String sample, Allele a1, Allele a2, double log10pError) {
return new Genotype(sample, Arrays.asList(a1, a2), log10pError);
}
private VariantContext makeVC(String source, List<Allele> alleles) {
return makeVC(source, alleles, null, null);
}
private VariantContext makeVC(String source, List<Allele> alleles, Genotype... g1) {
return makeVC(source, alleles, Arrays.asList(g1));
}
private VariantContext makeVC(String source, List<Allele> alleles, String filter) {
return makeVC(source, alleles, filter.equals(".") ? null : new HashSet<String>(Arrays.asList(filter)));
}
private VariantContext makeVC(String source, List<Allele> alleles, Set<String> filters) {
return makeVC(source, alleles, null, filters);
}
private VariantContext makeVC(String source, List<Allele> alleles, Collection<Genotype> genotypes) {
return makeVC(source, alleles, genotypes, null);
}
private VariantContext makeVC(String source, List<Allele> alleles, Collection<Genotype> genotypes, Set<String> filters) {
int start = 10;
int stop = start; // alleles.contains(ATC) ? start + 3 : start;
return new VariantContext(source, "1", start, stop, alleles,
genotypes == null ? null : VariantContext.genotypeCollectionToMap(new TreeMap<String, Genotype>(), genotypes),
1.0, filters, null, Cref.getBases()[0]);
}
// --------------------------------------------------------------------------------
//
// Test allele merging
//
// --------------------------------------------------------------------------------
private class MergeAllelesTest extends TestDataProvider {
List<List<Allele>> inputs;
List<Allele> expected;
private MergeAllelesTest(List<Allele>... arg) {
super(MergeAllelesTest.class);
LinkedList<List<Allele>> all = new LinkedList<List<Allele>>(Arrays.asList(arg));
expected = all.pollLast();
inputs = all;
}
public String toString() {
return String.format("MergeAllelesTest input=%s expected=%s", inputs, expected);
}
}
@DataProvider(name = "mergeAlleles")
public Object[][] mergeAllelesData() {
// first, do no harm
new MergeAllelesTest(Arrays.asList(Aref),
Arrays.asList(Aref));
new MergeAllelesTest(Arrays.asList(Aref),
Arrays.asList(Aref),
Arrays.asList(Aref));
new MergeAllelesTest(Arrays.asList(Aref),
Arrays.asList(Aref, T),
Arrays.asList(Aref, T));
new MergeAllelesTest(Arrays.asList(Aref, C),
Arrays.asList(Aref, T),
Arrays.asList(Aref, C, T));
new MergeAllelesTest(Arrays.asList(Aref, T),
Arrays.asList(Aref, C),
Arrays.asList(Aref, C, T)); // sorted by allele
new MergeAllelesTest(Arrays.asList(Aref, C, T),
Arrays.asList(Aref, C),
Arrays.asList(Aref, C, T));
new MergeAllelesTest(Arrays.asList(Aref, T, C),
Arrays.asList(Aref, C),
Arrays.asList(Aref, C, T)); // sorted by allele
// The following is actually a pathological case - there's no way on a vcf to represent a null allele that's non-variant.
// The code converts this (correctly) to a single-base non-variant vc with whatever base was there as a reference.
new MergeAllelesTest(Arrays.asList(delRef),
Arrays.asList(Cref));
new MergeAllelesTest(Arrays.asList(delRef),
Arrays.asList(delRef, ATC),
Arrays.asList(delRef, ATC));
new MergeAllelesTest(Arrays.asList(delRef),
Arrays.asList(delRef, ATC, ATCATC),
Arrays.asList(delRef, ATC, ATCATC));
new MergeAllelesTest(Arrays.asList(delRef, ATCATC),
Arrays.asList(delRef, ATC, ATCATC),
Arrays.asList(delRef, ATC, ATCATC));
new MergeAllelesTest(Arrays.asList(delRef, ATC),
Arrays.asList(delRef, ATCATC),
Arrays.asList(delRef, ATC, ATCATC));
return MergeAllelesTest.getTests(MergeAllelesTest.class);
}
@Test(dataProvider = "mergeAlleles")
public void testMergeAlleles(MergeAllelesTest cfg) {
final List<VariantContext> inputs = new ArrayList<VariantContext>();
int i = 0;
for ( final List<Allele> alleles : cfg.inputs ) {
final String name = "vcf" + ++i;
inputs.add(makeVC(name, alleles));
}
final List<String> priority = vcs2priority(inputs);
final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser,
inputs, priority,
VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, "set", false, false);
Assert.assertEquals(merged.getAlleles(), cfg.expected);
}
// --------------------------------------------------------------------------------
//
// Test rsID merging
//
// --------------------------------------------------------------------------------
private class SimpleMergeRSIDTest extends TestDataProvider {
List<String> inputs;
String expected;
private SimpleMergeRSIDTest(String... arg) {
super(SimpleMergeRSIDTest.class);
LinkedList<String> allStrings = new LinkedList<String>(Arrays.asList(arg));
expected = allStrings.pollLast();
inputs = allStrings;
}
public String toString() {
return String.format("SimpleMergeRSIDTest vc=%s expected=%s", inputs, expected);
}
}
@DataProvider(name = "simplemergersiddata")
public Object[][] createSimpleMergeRSIDData() {
new SimpleMergeRSIDTest(".", ".");
new SimpleMergeRSIDTest(".", ".", ".");
new SimpleMergeRSIDTest("rs1", "rs1");
new SimpleMergeRSIDTest("rs1", "rs1", "rs1");
new SimpleMergeRSIDTest(".", "rs1", "rs1");
new SimpleMergeRSIDTest("rs1", ".", "rs1");
new SimpleMergeRSIDTest("rs1", "rs2", "rs1,rs2");
new SimpleMergeRSIDTest("rs1", "rs2", "rs1", "rs1,rs2"); // duplicates
new SimpleMergeRSIDTest("rs2", "rs1", "rs2,rs1");
new SimpleMergeRSIDTest("rs2", "rs1", ".", "rs2,rs1");
new SimpleMergeRSIDTest("rs2", ".", "rs1", "rs2,rs1");
new SimpleMergeRSIDTest("rs1", ".", ".", "rs1");
new SimpleMergeRSIDTest("rs1", "rs2", "rs3", "rs1,rs2,rs3");
return SimpleMergeRSIDTest.getTests(SimpleMergeRSIDTest.class);
}
@Test(dataProvider = "simplemergersiddata")
public void testRSIDMerge(SimpleMergeRSIDTest cfg) {
final VariantContext snpVC1 = makeVC("snpvc1", Arrays.asList(Aref, T));
final List<VariantContext> inputs = new ArrayList<VariantContext>();
for ( final String id : cfg.inputs ) {
MutableVariantContext vc = new MutableVariantContext(snpVC1);
if ( ! id.equals(".") ) vc.setID(id);
inputs.add(vc);
}
final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser,
inputs, null,
VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
VariantContextUtils.GenotypeMergeType.UNSORTED, false, false, "set", false, false);
Assert.assertEquals(merged.getID(), cfg.expected.equals(".") ? null : cfg.expected);
}
// --------------------------------------------------------------------------------
//
// Test filtered merging
//
// --------------------------------------------------------------------------------
private class MergeFilteredTest extends TestDataProvider {
List<VariantContext> inputs;
VariantContext expected;
String setExpected;
VariantContextUtils.FilteredRecordMergeType type;
private MergeFilteredTest(String name, VariantContext input1, VariantContext input2, VariantContext expected, String setExpected) {
this(name, input1, input2, expected, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, setExpected);
}
private MergeFilteredTest(String name, VariantContext input1, VariantContext input2, VariantContext expected, VariantContextUtils.FilteredRecordMergeType type, String setExpected) {
super(MergeFilteredTest.class, name);
LinkedList<VariantContext> all = new LinkedList<VariantContext>(Arrays.asList(input1, input2));
this.expected = expected;
this.type = type;
inputs = all;
this.setExpected = setExpected;
}
public String toString() {
return String.format("%s input=%s expected=%s", super.toString(), inputs, expected);
}
}
@DataProvider(name = "mergeFiltered")
public Object[][] mergeFilteredData() {
new MergeFilteredTest("AllPass",
makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
VariantContextUtils.MERGE_INTERSECTION);
new MergeFilteredTest("noFilters",
makeVC("1", Arrays.asList(Aref, T), "."),
makeVC("2", Arrays.asList(Aref, T), "."),
makeVC("3", Arrays.asList(Aref, T), "."),
VariantContextUtils.MERGE_INTERSECTION);
new MergeFilteredTest("oneFiltered",
makeVC("1", Arrays.asList(Aref, T), "."),
makeVC("2", Arrays.asList(Aref, T), "FAIL"),
makeVC("3", Arrays.asList(Aref, T), "."),
String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX));
new MergeFilteredTest("onePassOneFail",
makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
makeVC("2", Arrays.asList(Aref, T), "FAIL"),
makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX));
new MergeFilteredTest("AllFiltered",
makeVC("1", Arrays.asList(Aref, T), "FAIL"),
makeVC("2", Arrays.asList(Aref, T), "FAIL"),
makeVC("3", Arrays.asList(Aref, T), "FAIL"),
VariantContextUtils.MERGE_FILTER_IN_ALL);
// test ALL vs. ANY
new MergeFilteredTest("FailOneUnfiltered",
makeVC("1", Arrays.asList(Aref, T), "FAIL"),
makeVC("2", Arrays.asList(Aref, T), "."),
makeVC("3", Arrays.asList(Aref, T), "."),
VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX));
new MergeFilteredTest("OneFailAllUnfilteredArg",
makeVC("1", Arrays.asList(Aref, T), "FAIL"),
makeVC("2", Arrays.asList(Aref, T), "."),
makeVC("3", Arrays.asList(Aref, T), "FAIL"),
VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ALL_UNFILTERED,
String.format("%s1-2", VariantContextUtils.MERGE_FILTER_PREFIX));
// test excluding allele in filtered record
new MergeFilteredTest("DontIncludeAlleleOfFilteredRecords",
makeVC("1", Arrays.asList(Aref, T), "."),
makeVC("2", Arrays.asList(Aref, T), "FAIL"),
makeVC("3", Arrays.asList(Aref, T), "."),
String.format("1-%s2", VariantContextUtils.MERGE_FILTER_PREFIX));
// promotion of site from unfiltered to PASSES
new MergeFilteredTest("UnfilteredPlusPassIsPass",
makeVC("1", Arrays.asList(Aref, T), "."),
makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
VariantContextUtils.MERGE_INTERSECTION);
new MergeFilteredTest("RefInAll",
makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS),
makeVC("2", Arrays.asList(Aref), VariantContext.PASSES_FILTERS),
makeVC("3", Arrays.asList(Aref), VariantContext.PASSES_FILTERS),
VariantContextUtils.MERGE_REF_IN_ALL);
new MergeFilteredTest("RefInOne",
makeVC("1", Arrays.asList(Aref), VariantContext.PASSES_FILTERS),
makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
makeVC("3", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS),
"2");
return MergeFilteredTest.getTests(MergeFilteredTest.class);
}
@Test(dataProvider = "mergeFiltered")
public void testMergeFiltered(MergeFilteredTest cfg) {
final List<String> priority = vcs2priority(cfg.inputs);
final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser,
cfg.inputs, priority, cfg.type, VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false);
// test alleles are equal
Assert.assertEquals(merged.getAlleles(), cfg.expected.getAlleles());
// test set field
Assert.assertEquals(merged.getAttribute("set"), cfg.setExpected);
// test filter field
Assert.assertEquals(merged.getFilters(), cfg.expected.getFilters());
}
// --------------------------------------------------------------------------------
//
// Test genotype merging
//
// --------------------------------------------------------------------------------
private class MergeGenotypesTest extends TestDataProvider {
List<VariantContext> inputs;
VariantContext expected;
List<String> priority;
private MergeGenotypesTest(String name, String priority, VariantContext... arg) {
super(MergeGenotypesTest.class, name);
LinkedList<VariantContext> all = new LinkedList<VariantContext>(Arrays.asList(arg));
this.expected = all.pollLast();
inputs = all;
this.priority = Arrays.asList(priority.split(","));
}
public String toString() {
return String.format("%s input=%s expected=%s", super.toString(), inputs, expected);
}
}
@DataProvider(name = "mergeGenotypes")
public Object[][] mergeGenotypesData() {
new MergeGenotypesTest("TakeGenotypeByPriority-1,2", "1,2",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)),
makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2)),
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)));
new MergeGenotypesTest("TakeGenotypeByPriority-1,2-nocall", "1,2",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Allele.NO_CALL, Allele.NO_CALL, 1)),
makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2)),
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Allele.NO_CALL, Allele.NO_CALL, 1)));
new MergeGenotypesTest("TakeGenotypeByPriority-2,1", "2,1",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)),
makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2)),
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2)));
new MergeGenotypesTest("NonOverlappingGenotypes", "1,2",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)),
makeVC("2", Arrays.asList(Aref, T), makeG("s2", Aref, T, 2)),
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1), makeG("s2", Aref, T, 2)));
new MergeGenotypesTest("PreserveNoCall", "1,2",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Allele.NO_CALL, Allele.NO_CALL, 1)),
makeVC("2", Arrays.asList(Aref, T), makeG("s2", Aref, T, 2)),
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Allele.NO_CALL, Allele.NO_CALL, 1), makeG("s2", Aref, T, 2)));
new MergeGenotypesTest("PerserveAlleles", "1,2",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)),
makeVC("2", Arrays.asList(Aref, C), makeG("s2", Aref, C, 2)),
makeVC("3", Arrays.asList(Aref, C, T), makeG("s1", Aref, T, 1), makeG("s2", Aref, C, 2)));
new MergeGenotypesTest("TakeGenotypePartialOverlap-1,2", "1,2",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)),
makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2), makeG("s3", Aref, T, 3)),
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1), makeG("s3", Aref, T, 3)));
new MergeGenotypesTest("TakeGenotypePartialOverlap-2,1", "2,1",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)),
makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2), makeG("s3", Aref, T, 3)),
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2), makeG("s3", Aref, T, 3)));
// merging genothpes with PLs
new MergeGenotypesTest("TakeGenotypePartialOverlapWithPLs-2,1", "2,1",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1,5,0,3)),
makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2,4,0,2), makeG("s3", Aref, T, 3,3,0,2)),
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2,4,0,2), makeG("s3", Aref, T, 3,3,0,2)));
new MergeGenotypesTest("TakeGenotypePartialOverlapWithPLs-1,2", "1,2",
makeVC("1", Arrays.asList(Aref,ATC), makeG("s1", Aref, ATC, 1,5,0,3)),
makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2,4,0,2), makeG("s3", Aref, T, 3,3,0,2)),
// no likelihoods on result since type changes to mixed multiallelic
makeVC("3", Arrays.asList(Aref, ATC, T), makeG("s1", Aref, ATC, 1), makeG("s3", Aref, T, 3)));
return MergeGenotypesTest.getTests(MergeGenotypesTest.class);
}
@Test(dataProvider = "mergeGenotypes")
public void testMergeGenotypes(MergeGenotypesTest cfg) {
final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser,
cfg.inputs, cfg.priority, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false);
// test alleles are equal
Assert.assertEquals(merged.getAlleles(), cfg.expected.getAlleles());
// test genotypes
assertGenotypesAreMostlyEqual(merged.getGenotypes(), cfg.expected.getGenotypes());
}
// necessary to not overload equals for genotypes
private void assertGenotypesAreMostlyEqual(Map<String, Genotype> actual, Map<String, Genotype> expected) {
if (actual == expected) {
return;
}
if (actual == null || expected == null) {
Assert.fail("Maps not equal: expected: " + expected + " and actual: " + actual);
}
if (actual.size() != expected.size()) {
Assert.fail("Maps do not have the same size:" + actual.size() + " != " + expected.size());
}
for (Map.Entry<String, Genotype> entry : actual.entrySet()) {
String key = entry.getKey();
Genotype value = entry.getValue();
Genotype expectedValue = expected.get(key);
Assert.assertEquals(value.alleles, expectedValue.alleles);
Assert.assertEquals(value.getNegLog10PError(), expectedValue.getNegLog10PError());
Assert.assertEquals(value.hasLikelihoods(), expectedValue.hasLikelihoods());
if ( value.hasLikelihoods() )
Assert.assertEquals(value.getLikelihoods().getAsVector(), expectedValue.getLikelihoods().getAsVector());
}
}
@Test
public void testMergeGenotypesUniquify() {
final VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1));
final VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2));
final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser,
Arrays.asList(vc1, vc2), null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
VariantContextUtils.GenotypeMergeType.UNIQUIFY, false, false, "set", false, false);
// test genotypes
Assert.assertEquals(merged.getGenotypes().keySet(), new HashSet<String>(Arrays.asList("s1.1", "s1.2")));
}
@Test(expectedExceptions = UserException.class)
public void testMergeGenotypesRequireUnique() {
final VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1));
final VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2));
final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser,
Arrays.asList(vc1, vc2), null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE, false, false, "set", false, false);
}
// --------------------------------------------------------------------------------
//
// Misc. tests
//
// --------------------------------------------------------------------------------
@Test
public void testAnnotationSet() {
for ( final boolean annotate : Arrays.asList(true, false)) {
for ( final String set : Arrays.asList("set", "combine", "x")) {
final List<String> priority = Arrays.asList("1", "2");
VariantContext vc1 = makeVC("1", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS);
VariantContext vc2 = makeVC("2", Arrays.asList(Aref, T), VariantContext.PASSES_FILTERS);
final VariantContext merged = VariantContextUtils.simpleMerge(genomeLocParser,
Arrays.asList(vc1, vc2), priority, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
VariantContextUtils.GenotypeMergeType.PRIORITIZE, annotate, false, set, false, false);
if ( annotate )
Assert.assertEquals(merged.getAttribute(set), VariantContextUtils.MERGE_INTERSECTION);
else
Assert.assertFalse(merged.hasAttribute(set));
}
}
}
private static final List<String> vcs2priority(final Collection<VariantContext> vcs) {
final List<String> priority = new ArrayList<String>();
for ( final VariantContext vc : vcs ) {
priority.add(vc.getSource());
}
return priority;
}
}

View File

@ -6,7 +6,7 @@
<dependencies>
<!-- Recalibration analysis script -->
<class name="org.broadinstitute.sting.analyzecovariates.AnalyzeCovariates" />
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.*" />
<package name="org.broadinstitute.sting.gatk.walkers.recalibration" />
</dependencies>
</executable>
<resources>

View File

@ -29,7 +29,7 @@
<!-- Core walkers -->
<package name="org.broadinstitute.sting.gatk.walkers.**" />
<!-- All non-oneoff GATK-specific RODs -->
<package name="org.broadinstitute.sting.gatk.refdata.**" />
<package name="org.broadinstitute.sting.utils.codecs.**" />
<!-- Filters -->
<package name="org.broadinstitute.sting.gatk.filters" />
<!-- Tribble codecs -->

View File

@ -221,7 +221,7 @@ class MethodsDevelopmentCallingPipeline extends QScript {
this.max_deletion_fraction = qscript.deletions
this.out = t.rawVCF
this.glm = org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel.Model.SNP
this.baq = if (noBAQ) {org.broadinstitute.sting.utils.baq.BAQ.CalculationMode.OFF} else {org.broadinstitute.sting.utils.baq.BAQ.CalculationMode.CALCULATE_AS_NECESSARY}
this.baq = if (noBAQ || t.isExome) {org.broadinstitute.sting.utils.baq.BAQ.CalculationMode.OFF} else {org.broadinstitute.sting.utils.baq.BAQ.CalculationMode.CALCULATE_AS_NECESSARY}
this.analysisName = t.name + "_UGs"
this.jobName = queueLogDir + t.name + ".snpcall"
}
@ -266,13 +266,18 @@ class MethodsDevelopmentCallingPipeline extends QScript {
this.resource :+= new TaggedFile( t.dbsnpFile, "known=true,prior=2.0" )
this.resource :+= new TaggedFile( projectConsensus_1000G, "prior=8.0" )
this.use_annotation ++= List("QD", "HaplotypeScore", "MQRankSum", "ReadPosRankSum", "MQ", "FS")
if(t.nSamples >= 10) {
if(t.nSamples >= 10) { // InbreedingCoeff is a population-wide statistic that requires at least 10 samples to calculate
this.use_annotation ++= List("InbreedingCoeff")
}
if(!t.isExome) {
this.use_annotation ++= List("DP")
} else {
} else { // exome specific parameters
this.resource :+= new TaggedFile( badSites_1000G, "bad=true,prior=2.0" )
this.mG = 6
if(t.nSamples <= 3) { // very few exome samples means very few variants
this.mG = 4
this.percentBad = 0.04
}
}
this.tranches_file = if ( goldStandard ) { t.goldStandardTranchesFile } else { t.tranchesFile }
this.recal_file = if ( goldStandard ) { t.goldStandardRecalFile } else { t.recalFile }