Merge branch 'master' of ssh://copper.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
b279ae4ead
|
|
@ -155,7 +155,7 @@ public class FeatureManager {
|
|||
public FeatureDescriptor getByFiletype(File file) {
|
||||
List<FeatureDescriptor> canParse = new ArrayList<FeatureDescriptor>();
|
||||
for ( FeatureDescriptor descriptor : featureDescriptors )
|
||||
if ( descriptor.getCodec().canDecode(file) ) {
|
||||
if ( descriptor.getCodec().canDecode(file.getPath()) ) {
|
||||
canParse.add(descriptor);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,102 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: 11/14/11
|
||||
*/
|
||||
|
||||
public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||
|
||||
private Set<MendelianViolation> fullMVSet = null;
|
||||
private final static int REF = 0;
|
||||
private final static int HET = 1;
|
||||
private final static int HOM = 2;
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( fullMVSet == null ) {
|
||||
fullMVSet = new HashSet<MendelianViolation>();
|
||||
|
||||
if ( walker instanceof VariantAnnotator ) {
|
||||
final Map<String,Set<Sample>> families = ((VariantAnnotator) walker).getSampleDB().getFamilies();
|
||||
for( final Set<Sample> family : families.values() ) {
|
||||
for( final Sample sample : family ) {
|
||||
if( sample.getParents().size() == 2 && family.containsAll(sample.getParents()) ) { // only works with trios for now
|
||||
fullMVSet.add( new MendelianViolation(sample, 0.0) );
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
throw new UserException("Transmission disequilibrium test annotation can only be used from the Variant Annotator and requires a valid ped file be passed in.");
|
||||
}
|
||||
}
|
||||
|
||||
final Map<String,Object> toRet = new HashMap<String,Object>(1);
|
||||
final HashSet<MendelianViolation> mvsToTest = new HashSet<MendelianViolation>();
|
||||
|
||||
for( final MendelianViolation mv : fullMVSet ) {
|
||||
final boolean hasAppropriateGenotypes = vc.hasGenotype(mv.getSampleChild()) && vc.getGenotype(mv.getSampleChild()).hasLikelihoods() &&
|
||||
vc.hasGenotype(mv.getSampleDad()) && vc.getGenotype(mv.getSampleDad()).hasLikelihoods() &&
|
||||
vc.hasGenotype(mv.getSampleMom()) && vc.getGenotype(mv.getSampleMom()).hasLikelihoods();
|
||||
if ( hasAppropriateGenotypes ) {
|
||||
mvsToTest.add(mv);
|
||||
}
|
||||
}
|
||||
|
||||
toRet.put("TDT", calculateTDT( vc, mvsToTest ));
|
||||
|
||||
return toRet;
|
||||
}
|
||||
|
||||
// return the descriptions used for the VCF INFO meta field
|
||||
public List<String> getKeyNames() { return Arrays.asList("TDT"); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("TDT", 1, VCFHeaderLineType.Float, "Test statistic from Wittkowski transmission disequilibrium test.")); }
|
||||
|
||||
// Following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT
|
||||
private double calculateTDT( final VariantContext vc, final Set<MendelianViolation> mvsToTest ) {
|
||||
|
||||
final double nABGivenABandBB = calculateNChildren(vc, mvsToTest, HET, HET, HOM);
|
||||
final double nBBGivenABandBB = calculateNChildren(vc, mvsToTest, HOM, HET, HOM);
|
||||
final double nAAGivenABandAB = calculateNChildren(vc, mvsToTest, REF, HET, HET);
|
||||
final double nBBGivenABandAB = calculateNChildren(vc, mvsToTest, HOM, HET, HET);
|
||||
final double nAAGivenAAandAB = calculateNChildren(vc, mvsToTest, REF, REF, HET);
|
||||
final double nABGivenAAandAB = calculateNChildren(vc, mvsToTest, HET, REF, HET);
|
||||
|
||||
final double numer = (nABGivenABandBB - nBBGivenABandBB) + 2.0 * (nAAGivenABandAB - nBBGivenABandAB) + (nAAGivenAAandAB - nABGivenAAandAB);
|
||||
final double denom = (nABGivenABandBB + nBBGivenABandBB) + 4.0 * (nAAGivenABandAB + nBBGivenABandAB) + (nAAGivenAAandAB + nABGivenAAandAB);
|
||||
return (numer * numer) / denom;
|
||||
}
|
||||
|
||||
private double calculateNChildren( final VariantContext vc, final Set<MendelianViolation> mvsToTest, final int childIdx, final int momIdx, final int dadIdx ) {
|
||||
final double likelihoodVector[] = new double[mvsToTest.size() * 2];
|
||||
int iii = 0;
|
||||
for( final MendelianViolation mv : mvsToTest ) {
|
||||
final double[] momGL = vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsVector();
|
||||
final double[] dadGL = vc.getGenotype(mv.getSampleDad()).getLikelihoods().getAsVector();
|
||||
final double[] childGL = vc.getGenotype(mv.getSampleChild()).getLikelihoods().getAsVector();
|
||||
likelihoodVector[iii++] = momGL[momIdx] + dadGL[dadIdx] + childGL[childIdx];
|
||||
likelihoodVector[iii++] = momGL[dadIdx] + dadGL[momIdx] + childGL[childIdx];
|
||||
}
|
||||
|
||||
return MathUtils.sumLog10(likelihoodVector);
|
||||
}
|
||||
}
|
||||
|
|
@ -32,6 +32,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.samples.SampleDB;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
|
|
@ -196,6 +197,11 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
|||
System.exit(0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SampleDB getSampleDB() {
|
||||
return super.getSampleDB();
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare the output file and the list of available features.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -33,7 +33,6 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileReader;
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
|
|
@ -135,6 +134,6 @@ public class VCFDiffableReader implements DiffableReader {
|
|||
|
||||
@Override
|
||||
public boolean canRead(File file) {
|
||||
return AbstractVCFCodec.canDecodeFile(file, VCFCodec.VCF4_MAGIC_HEADER);
|
||||
return AbstractVCFCodec.canDecodeFile(file.getPath(), VCFCodec.VCF4_MAGIC_HEADER);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -50,11 +50,12 @@ public class UnifiedGenotyperEngine {
|
|||
public static final String LOW_QUAL_FILTER_NAME = "LowQual";
|
||||
|
||||
public enum OUTPUT_MODE {
|
||||
/** the default */
|
||||
/** produces calls only at variant sites */
|
||||
EMIT_VARIANTS_ONLY,
|
||||
/** include confident reference sites */
|
||||
/** produces calls at variant sites and confident reference sites */
|
||||
EMIT_ALL_CONFIDENT_SITES,
|
||||
/** any callable site regardless of confidence */
|
||||
/** produces calls at any callable site regardless of confidence; this argument is intended for point
|
||||
* mutations (SNPs) only and while some indel calls may be produced they are by no means comprehensive */
|
||||
EMIT_ALL_SITES
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -58,8 +58,7 @@ import java.util.*;
|
|||
* slightly lower quality level.
|
||||
*
|
||||
* <p>
|
||||
* See the GATK wiki for a tutorial and example recalibration accuracy plots.
|
||||
* http://www.broadinstitute.org/gsa/wiki/index.php/Variant_quality_score_recalibration
|
||||
* See <a href="http://www.broadinstitute.org/gsa/wiki/index.php/Variant_quality_score_recalibration">the GATK wiki for a tutorial and example recalibration accuracy plots.</a>
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
|
|
@ -78,7 +77,7 @@ import java.util.*;
|
|||
* java -Xmx3g -jar GenomeAnalysisTK.jar \
|
||||
* -T ApplyRecalibration \
|
||||
* -R reference/human_g1k_v37.fasta \
|
||||
* -input NA12878.HiSeq.WGS.bwa.cleaned.raw.hg19.subset.vcf \
|
||||
* -input NA12878.HiSeq.WGS.bwa.cleaned.raw.subset.b37.vcf \
|
||||
* --ts_filter_level 99.0 \
|
||||
* -tranchesFile path/to/output.tranches \
|
||||
* -recalFile path/to/output.recal \
|
||||
|
|
|
|||
|
|
@ -66,12 +66,14 @@ import java.util.*;
|
|||
* the log odds ratio of being a true variant versus being false under the trained Gaussian mixture model.
|
||||
*
|
||||
* <p>
|
||||
* NOTE: Please see our <a href="http://www.broadinstitute.org/gsa/wiki/index.php/Best_Practice_Variant_Detection_with_the_GATK_v3">best practices wiki page</a> for our recommendations on which annotations to use for specific project designs.
|
||||
*
|
||||
* <p>
|
||||
* NOTE: In order to create the model reporting plots Rscript needs to be in your environment PATH (this is the scripting version of R, not the interactive version).
|
||||
* See <a target="r-project" href="http://www.r-project.org">http://www.r-project.org</a> for more info on how to download and install R.
|
||||
*
|
||||
* <p>
|
||||
* See the GATK wiki for a tutorial and example recalibration accuracy plots.
|
||||
* http://www.broadinstitute.org/gsa/wiki/index.php/Variant_quality_score_recalibration
|
||||
* See <a href="http://www.broadinstitute.org/gsa/wiki/index.php/Variant_quality_score_recalibration">the GATK wiki for a tutorial and example recalibration accuracy plots.</a>
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
|
|
@ -83,18 +85,18 @@ import java.util.*;
|
|||
* <p>
|
||||
* A recalibration table file in CSV format that is used by the ApplyRecalibration walker.
|
||||
* <p>
|
||||
* A tranches file which shows various metrics of the recalibration callset as a function of making several slices through the data.
|
||||
* A tranches file which shows various metrics of the recalibration callset as a function of making several slices through the data.
|
||||
*
|
||||
* <h2>Example</h2>
|
||||
* <pre>
|
||||
* java -Xmx4g -jar GenomeAnalysisTK.jar \
|
||||
* -T VariantRecalibrator \
|
||||
* -R reference/human_g1k_v37.fasta \
|
||||
* -input NA12878.HiSeq.WGS.bwa.cleaned.raw.hg19.subset.vcf \
|
||||
* -input NA12878.HiSeq.WGS.bwa.cleaned.raw.subset.b37.vcf \
|
||||
* -resource:hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.b37.sites.vcf \
|
||||
* -resource:omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.b37.sites.vcf \
|
||||
* -resource:dbsnp,known=true,training=false,truth=false,prior=8.0 dbsnp_132.b37.vcf \
|
||||
* -an QD -an HaplotypeScore -an MQRankSum -an ReadPosRankSum -an FS -an MQ \
|
||||
* -an QD -an HaplotypeScore -an MQRankSum -an ReadPosRankSum -an MQ \
|
||||
* -recalFile path/to/output.recal \
|
||||
* -tranchesFile path/to/output.tranches \
|
||||
* -rscriptFile path/to/output.plots.R
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ public class MendelianViolation {
|
|||
/**
|
||||
* An alternative to the more general constructor if you want to get the Sample information from the engine yourself.
|
||||
* @param sample - the sample object extracted from the sample metadata YAML file given to the engine.
|
||||
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
|
||||
* @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to assess mendelian violation
|
||||
*/
|
||||
public MendelianViolation(Sample sample, double minGenotypeQualityP) {
|
||||
sampleMom = sample.getMother().getID();
|
||||
|
|
|
|||
|
|
@ -249,6 +249,6 @@ public class BeagleCodec implements ReferenceDependentFeatureCodec<BeagleFeature
|
|||
return bglFeature;
|
||||
}
|
||||
|
||||
public boolean canDecode(final File potentialInput) { return false; }
|
||||
public boolean canDecode(final String potentialInput) { return false; }
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,7 +9,6 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
|
|||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
|
|
@ -140,6 +139,6 @@ public class RefSeqCodec implements ReferenceDependentFeatureCodec<RefSeqFeature
|
|||
return RefSeqFeature.class;
|
||||
}
|
||||
|
||||
public boolean canDecode(final File potentialInput) { return false; }
|
||||
public boolean canDecode(final String potentialInput) { return false; }
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,7 +6,6 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
|
|||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
|
|
@ -109,6 +108,6 @@ public class TableCodec implements ReferenceDependentFeatureCodec {
|
|||
return header;
|
||||
}
|
||||
|
||||
public boolean canDecode(final File potentialInput) { return false; }
|
||||
public boolean canDecode(final String potentialInput) { return false; }
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -638,7 +638,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
|
|||
return position+Math.max(refLength - 1,0);
|
||||
}
|
||||
|
||||
public final static boolean canDecodeFile(final File potentialInput, final String MAGIC_HEADER_LINE) {
|
||||
public final static boolean canDecodeFile(final String potentialInput, final String MAGIC_HEADER_LINE) {
|
||||
try {
|
||||
return isVCFStream(new FileInputStream(potentialInput), MAGIC_HEADER_LINE) ||
|
||||
isVCFStream(new GZIPInputStream(new FileInputStream(potentialInput)), MAGIC_HEADER_LINE) ||
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ import org.broad.tribble.readers.LineReader;
|
|||
import org.broad.tribble.util.ParsingUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -193,7 +192,7 @@ public class VCF3Codec extends AbstractVCFCodec {
|
|||
}
|
||||
|
||||
@Override
|
||||
public boolean canDecode(final File potentialInput) {
|
||||
public boolean canDecode(final String potentialInput) {
|
||||
return canDecodeFile(potentialInput, VCF3_MAGIC_HEADER);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ import org.broad.tribble.readers.LineReader;
|
|||
import org.broad.tribble.util.ParsingUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -217,7 +216,7 @@ public class VCFCodec extends AbstractVCFCodec {
|
|||
}
|
||||
|
||||
@Override
|
||||
public boolean canDecode(final File potentialInput) {
|
||||
public boolean canDecode(final String potentialInput) {
|
||||
return canDecodeFile(potentialInput, VCF4_MAGIC_HEADER);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -168,4 +168,15 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
);
|
||||
executeTest("Testing SnpEff annotations (unsupported version)", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTDTAnnotation() {
|
||||
final String MD5 = "9fe37b61aab695ad47ce3c587148e91f";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T VariantAnnotator -R " + b37KGReference + " -A TransmissionDisequilibriumTest --variant:vcf " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf" +
|
||||
" -L " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf -NO_HEADER -ped " + validationDataLocation + "ug.random50000.family.ped -o %s", 1,
|
||||
Arrays.asList(MD5));
|
||||
executeTest("Testing TDT annotation", spec);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -1,3 +1,3 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="org.broad" module="tribble" revision="41" status="integration" />
|
||||
<info organisation="org.broad" module="tribble" revision="42" status="integration" />
|
||||
</ivy-module>
|
||||
Loading…
Reference in New Issue