diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java index c41444ef3..fcd85fd1d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java @@ -155,7 +155,7 @@ public class FeatureManager { public FeatureDescriptor getByFiletype(File file) { List canParse = new ArrayList(); for ( FeatureDescriptor descriptor : featureDescriptors ) - if ( descriptor.getCodec().canDecode(file) ) { + if ( descriptor.getCodec().canDecode(file.getPath()) ) { canParse.add(descriptor); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java new file mode 100644 index 000000000..3de179365 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java @@ -0,0 +1,102 @@ +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.samples.Sample; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.MendelianViolation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.text.XReadLines; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.io.FileNotFoundException; +import java.util.*; + +/** + * Created by IntelliJ IDEA. + * User: rpoplin + * Date: 11/14/11 + */ + +public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implements ExperimentalAnnotation { + + private Set fullMVSet = null; + private final static int REF = 0; + private final static int HET = 1; + private final static int HOM = 2; + + public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + if ( fullMVSet == null ) { + fullMVSet = new HashSet(); + + if ( walker instanceof VariantAnnotator ) { + final Map> families = ((VariantAnnotator) walker).getSampleDB().getFamilies(); + for( final Set family : families.values() ) { + for( final Sample sample : family ) { + if( sample.getParents().size() == 2 && family.containsAll(sample.getParents()) ) { // only works with trios for now + fullMVSet.add( new MendelianViolation(sample, 0.0) ); + } + } + } + } else { + throw new UserException("Transmission disequilibrium test annotation can only be used from the Variant Annotator and requires a valid ped file be passed in."); + } + } + + final Map toRet = new HashMap(1); + final HashSet mvsToTest = new HashSet(); + + for( final MendelianViolation mv : fullMVSet ) { + final boolean hasAppropriateGenotypes = vc.hasGenotype(mv.getSampleChild()) && vc.getGenotype(mv.getSampleChild()).hasLikelihoods() && + vc.hasGenotype(mv.getSampleDad()) && vc.getGenotype(mv.getSampleDad()).hasLikelihoods() && + vc.hasGenotype(mv.getSampleMom()) && vc.getGenotype(mv.getSampleMom()).hasLikelihoods(); + if ( hasAppropriateGenotypes ) { + mvsToTest.add(mv); + } + } + + toRet.put("TDT", calculateTDT( vc, mvsToTest )); + + return toRet; + } + + // return the descriptions used for the VCF INFO meta field + public List getKeyNames() { return Arrays.asList("TDT"); } + + public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("TDT", 1, VCFHeaderLineType.Float, "Test statistic from Wittkowski transmission disequilibrium test.")); } + + // Following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT + private double calculateTDT( final VariantContext vc, final Set mvsToTest ) { + + final double nABGivenABandBB = calculateNChildren(vc, mvsToTest, HET, HET, HOM); + final double nBBGivenABandBB = calculateNChildren(vc, mvsToTest, HOM, HET, HOM); + final double nAAGivenABandAB = calculateNChildren(vc, mvsToTest, REF, HET, HET); + final double nBBGivenABandAB = calculateNChildren(vc, mvsToTest, HOM, HET, HET); + final double nAAGivenAAandAB = calculateNChildren(vc, mvsToTest, REF, REF, HET); + final double nABGivenAAandAB = calculateNChildren(vc, mvsToTest, HET, REF, HET); + + final double numer = (nABGivenABandBB - nBBGivenABandBB) + 2.0 * (nAAGivenABandAB - nBBGivenABandAB) + (nAAGivenAAandAB - nABGivenAAandAB); + final double denom = (nABGivenABandBB + nBBGivenABandBB) + 4.0 * (nAAGivenABandAB + nBBGivenABandAB) + (nAAGivenAAandAB + nABGivenAAandAB); + return (numer * numer) / denom; + } + + private double calculateNChildren( final VariantContext vc, final Set mvsToTest, final int childIdx, final int momIdx, final int dadIdx ) { + final double likelihoodVector[] = new double[mvsToTest.size() * 2]; + int iii = 0; + for( final MendelianViolation mv : mvsToTest ) { + final double[] momGL = vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsVector(); + final double[] dadGL = vc.getGenotype(mv.getSampleDad()).getLikelihoods().getAsVector(); + final double[] childGL = vc.getGenotype(mv.getSampleChild()).getLikelihoods().getAsVector(); + likelihoodVector[iii++] = momGL[momIdx] + dadGL[dadIdx] + childGL[childIdx]; + likelihoodVector[iii++] = momGL[dadIdx] + dadGL[momIdx] + childGL[childIdx]; + } + + return MathUtils.sumLog10(likelihoodVector); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index c9ea7a3b5..143f2eb2e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -32,6 +32,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.samples.SampleDB; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; import org.broadinstitute.sting.utils.BaseUtils; @@ -196,6 +197,11 @@ public class VariantAnnotator extends RodWalker implements Ann System.exit(0); } + @Override + public SampleDB getSampleDB() { + return super.getSampleDB(); + } + /** * Prepare the output file and the list of available features. */ diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java index efa57c0aa..3c0da8e9d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java @@ -33,7 +33,6 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.File; import java.io.FileInputStream; -import java.io.FileReader; import java.io.IOException; import java.util.Map; @@ -135,6 +134,6 @@ public class VCFDiffableReader implements DiffableReader { @Override public boolean canRead(File file) { - return AbstractVCFCodec.canDecodeFile(file, VCFCodec.VCF4_MAGIC_HEADER); + return AbstractVCFCodec.canDecodeFile(file.getPath(), VCFCodec.VCF4_MAGIC_HEADER); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index c38bb5b42..c861af1a2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -50,11 +50,12 @@ public class UnifiedGenotyperEngine { public static final String LOW_QUAL_FILTER_NAME = "LowQual"; public enum OUTPUT_MODE { - /** the default */ + /** produces calls only at variant sites */ EMIT_VARIANTS_ONLY, - /** include confident reference sites */ + /** produces calls at variant sites and confident reference sites */ EMIT_ALL_CONFIDENT_SITES, - /** any callable site regardless of confidence */ + /** produces calls at any callable site regardless of confidence; this argument is intended for point + * mutations (SNPs) only and while some indel calls may be produced they are by no means comprehensive */ EMIT_ALL_SITES } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java index b1b8fa46d..fac5b22aa 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java @@ -58,8 +58,7 @@ import java.util.*; * slightly lower quality level. * *

- * See the GATK wiki for a tutorial and example recalibration accuracy plots. - * http://www.broadinstitute.org/gsa/wiki/index.php/Variant_quality_score_recalibration + * See the GATK wiki for a tutorial and example recalibration accuracy plots. * *

Input

*

@@ -78,7 +77,7 @@ import java.util.*; * java -Xmx3g -jar GenomeAnalysisTK.jar \ * -T ApplyRecalibration \ * -R reference/human_g1k_v37.fasta \ - * -input NA12878.HiSeq.WGS.bwa.cleaned.raw.hg19.subset.vcf \ + * -input NA12878.HiSeq.WGS.bwa.cleaned.raw.subset.b37.vcf \ * --ts_filter_level 99.0 \ * -tranchesFile path/to/output.tranches \ * -recalFile path/to/output.recal \ diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index 20b000978..339b8e0e6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -66,12 +66,14 @@ import java.util.*; * the log odds ratio of being a true variant versus being false under the trained Gaussian mixture model. * *

+ * NOTE: Please see our best practices wiki page for our recommendations on which annotations to use for specific project designs. + * + *

* NOTE: In order to create the model reporting plots Rscript needs to be in your environment PATH (this is the scripting version of R, not the interactive version). * See http://www.r-project.org for more info on how to download and install R. * *

- * See the GATK wiki for a tutorial and example recalibration accuracy plots. - * http://www.broadinstitute.org/gsa/wiki/index.php/Variant_quality_score_recalibration + * See the GATK wiki for a tutorial and example recalibration accuracy plots. * *

Input

*

@@ -83,18 +85,18 @@ import java.util.*; *

* A recalibration table file in CSV format that is used by the ApplyRecalibration walker. *

- * A tranches file which shows various metrics of the recalibration callset as a function of making several slices through the data. + * A tranches file which shows various metrics of the recalibration callset as a function of making several slices through the data. * *

Example

*
  * java -Xmx4g -jar GenomeAnalysisTK.jar \
  *   -T VariantRecalibrator \
  *   -R reference/human_g1k_v37.fasta \
- *   -input NA12878.HiSeq.WGS.bwa.cleaned.raw.hg19.subset.vcf \
+ *   -input NA12878.HiSeq.WGS.bwa.cleaned.raw.subset.b37.vcf \
  *   -resource:hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.b37.sites.vcf \
  *   -resource:omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.b37.sites.vcf \
  *   -resource:dbsnp,known=true,training=false,truth=false,prior=8.0 dbsnp_132.b37.vcf \
- *   -an QD -an HaplotypeScore -an MQRankSum -an ReadPosRankSum -an FS -an MQ \
+ *   -an QD -an HaplotypeScore -an MQRankSum -an ReadPosRankSum -an MQ \
  *   -recalFile path/to/output.recal \
  *   -tranchesFile path/to/output.tranches \
  *   -rscriptFile path/to/output.plots.R
diff --git a/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java b/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java
index cf45dab79..d0579fc25 100755
--- a/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java
+++ b/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java
@@ -77,7 +77,7 @@ public class MendelianViolation {
     /**
      * An alternative to the more general constructor if you want to get the Sample information from the engine yourself.
      * @param sample - the sample object extracted from the sample metadata YAML file given to the engine.
-     * @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
+     * @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to assess mendelian violation
      */
     public MendelianViolation(Sample sample, double minGenotypeQualityP) {
         sampleMom = sample.getMother().getID();
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/beagle/BeagleCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/beagle/BeagleCodec.java
index 90d305d73..e4768fd5b 100755
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/beagle/BeagleCodec.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/beagle/BeagleCodec.java
@@ -249,6 +249,6 @@ public class BeagleCodec implements ReferenceDependentFeatureCodec
-    
+