From 436b4dc855942cb1049af78d81be3fc2dce475ee Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 28 Nov 2011 08:59:48 -0500 Subject: [PATCH 1/5] Updated docs --- .../gatk/walkers/genotyper/UnifiedGenotyperEngine.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index c38bb5b42..c861af1a2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -50,11 +50,12 @@ public class UnifiedGenotyperEngine { public static final String LOW_QUAL_FILTER_NAME = "LowQual"; public enum OUTPUT_MODE { - /** the default */ + /** produces calls only at variant sites */ EMIT_VARIANTS_ONLY, - /** include confident reference sites */ + /** produces calls at variant sites and confident reference sites */ EMIT_ALL_CONFIDENT_SITES, - /** any callable site regardless of confidence */ + /** produces calls at any callable site regardless of confidence; this argument is intended for point + * mutations (SNPs) only and while some indel calls may be produced they are by no means comprehensive */ EMIT_ALL_SITES } From d7d8b8e38030bef5518055734f8992805cb28bdf Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 28 Nov 2011 14:18:28 -0500 Subject: [PATCH 2/5] Tribble v42 changes the Codec.canDecode method to take in a String instead of a File; this is something that Jim was adamant about (because Tribble can handle streams other than files). I didn't want the next person who needed to rev Tribble to deal with this change additionally, so I took care of updating the GATK now. --- .../gatk/refdata/tracks/FeatureManager.java | 2 +- .../walkers/diffengine/VCFDiffableReader.java | 3 +-- .../utils/codecs/beagle/BeagleCodec.java | 2 +- .../utils/codecs/refseq/RefSeqCodec.java | 3 +-- .../sting/utils/codecs/table/TableCodec.java | 3 +-- .../utils/codecs/vcf/AbstractVCFCodec.java | 2 +- .../sting/utils/codecs/vcf/VCF3Codec.java | 3 +-- .../sting/utils/codecs/vcf/VCFCodec.java | 3 +-- .../{tribble-41.jar => tribble-42.jar} | Bin 301217 -> 301131 bytes .../{tribble-41.xml => tribble-42.xml} | 2 +- 10 files changed, 9 insertions(+), 14 deletions(-) rename settings/repository/org.broad/{tribble-41.jar => tribble-42.jar} (93%) rename settings/repository/org.broad/{tribble-41.xml => tribble-42.xml} (51%) diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java index c41444ef3..fcd85fd1d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java @@ -155,7 +155,7 @@ public class FeatureManager { public FeatureDescriptor getByFiletype(File file) { List canParse = new ArrayList(); for ( FeatureDescriptor descriptor : featureDescriptors ) - if ( descriptor.getCodec().canDecode(file) ) { + if ( descriptor.getCodec().canDecode(file.getPath()) ) { canParse.add(descriptor); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java index efa57c0aa..3c0da8e9d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java @@ -33,7 +33,6 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.File; import java.io.FileInputStream; -import java.io.FileReader; import java.io.IOException; import java.util.Map; @@ -135,6 +134,6 @@ public class VCFDiffableReader implements DiffableReader { @Override public boolean canRead(File file) { - return AbstractVCFCodec.canDecodeFile(file, VCFCodec.VCF4_MAGIC_HEADER); + return AbstractVCFCodec.canDecodeFile(file.getPath(), VCFCodec.VCF4_MAGIC_HEADER); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/beagle/BeagleCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/beagle/BeagleCodec.java index 90d305d73..e4768fd5b 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/beagle/BeagleCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/beagle/BeagleCodec.java @@ -249,6 +249,6 @@ public class BeagleCodec implements ReferenceDependentFeatureCodecbm43(phUw?d6sl+Gc5)nYE>DYNlnH)PCoj%fRSA3%~E)GvE9B&UerG&S~E5 zdt{fd**;z&CIBPA$?RwA%>$HA@O9rt{yG`NUpKeDIwVG&eZokPN^eT022| z(xfb6)vBel+MJF6dgVnWu&qhnCvl;fq=O3$wS_!xMgaz4Oi1%?HBj#&$*rx*%_cEP zEy;6?_J{ixCfByrP|6jw4e%JLaLfX=hrpDYZwSm1pJthp0{xO@Ze>TReO-$Jz+gDH zvbx;Mzyc2lhfEVb zvP>XDHhbh4A(y=u8X?aJ`6gHdi%n1fg$8)q1ZH@~2t`I%!dQyg@1-UvfoIuhnE^@} zy37NXLpj5&V561n@f_3hJi}F(V1OB3U`AINU^T;6GJF*itu{gpd#^D-tpVz^`)fn! z!MT!0TgQYiB*Ox90d2H>HEEw@K>7d!)N7mVeb~ggk^GFp1=?kMm=Xz5n$L+INh(CZ z02PM9FcsERGPXbV36KqzY37QS?vI>8N78PEBE!ytXu-L7z4LwrfZ!lGb?eKRM zTG{AT_SgYCRoDf)Rd@~BRIo!Ty*d!}AKY!wf*$qg{W|PXVJ|pT_y=ebkk~OP52&yY zW^3LC#^t5uJi3}Dk0SxJy+pRO4V}D19_Cq?HzhxR-s0qpf_V}c=PjIDFfFri0fb<3 z`vU;_%W{BEBaSgI_#CL+ni5nNjX0KQ01FWCg%}*EoZky#VG#b3AP(XgKaPhZC*yUG z8n_q8E*#0gG`Mq#o?O~raOl#EHW-E=U1Ad;^vE@c^+46pZH-g!m!xRz<7u^J%=B|u z0OeqSdr-uHlVE^*;XZt}fCBfUc>_3LG>qWXqi8w-MuJf?UN`R^02A=Q|OhYa1;D?{p<2W*a z4XWS?92v_X3}SBNLjR0yB~9$OX_6#< z&H@-Eb%0sIN1`O%1o;-4O-C092Hg`O17_osm1iP`FXP*U)!ENCbT8U(#h=}4w6R+b zP0pxIFbCt3yhme={(rS@>88issmC4WGClY|kJ+`r{H`61Z6rxE4~WHDO7rvN%XdIl zjhPR&6=_C3NL>D%%9roB>!H+MNQ|DHdm_{DWX&9t7u)#c;)Xw}a6zxK&_f550Bw1V z7wsq{X04idw3!;KEldZEO_4*jrehweb7N&nPHY|I&uuAhRf#>L{=JK0u^l2DK{_P%Z)*B;Z7?)#um~_I8k7A>w5}rFp zYqaDS%0+WtF8cm5*6(GN=b)ij>go9X5$O+Z(s6sHbdG!D_$pm$1=6=8<(9w zK5tF5CpHb9^rtgQ0IkcE0v$dJqy#=M4ts&Lm*iw1amDF$EA}ZqFJ9xQNA2kbwN5J z%sz2lI?bsxzX|j29VDCY$p5X>+axduATWqFe=7$tmH?XGA$id4pGsbicW+63gSeAR z)yic(`T+FkLo<%*jE-Qd9NLH596L-lc(D3cb-r7(yqM6_9rA^NEFSu9q%xQX zb~94>RuIn(Qr3iU+@e(FYk`|GO*taM2u)Y~h2n&{ikKJ2*u~0p1JC{T*CUTy$2z~R zGIwn>zeMUqBVSN3CB@1wqS`N3DBhxmZr3Yw+;}ptR|K}z;9Ab0KaEdUtn|-XVsUJs z3YEC#{q1E^S~x&jI31NjY^oYQ`pB#F`J+)Ofv4;nHF|DfncnPj9HNnHmQ z8jwXM1~XLa6|(&u{KOdG7w-IPDW6ERMgD&Ll+u(%e1jOb%OS?ei^q-fiS$Gk8NyXI zzI^{H2{`)oZuJcAafxfP6$PTTorB!9%~p51w1TMIQ~Tr} z8iu2n3`V+0q}os-T&uHLEq9oq84J;c?}9l!Db|$5aIu#nd(wtHUF`3S@J198!sw5kNGTtY;e!$Br`3(|q$BfnH4_*iofFo6 zXQeb>*V4-HZzeDShQEP7c}4tt&dsgD@@R=E0jX3NW zwEUUw)Vokux2?D~_CuUv2JGO``PWjuX_juh7roV~&QgHt_GB^w9aTW0cmd}!WJ;0F z?MrtR=z;mlpRK^t8auJ^1520gFUoXoqS>DHrlvyO-Jn8L^=>uSk6oC^&-_!@dfL!E z2CU)i{A(${#W}GH>rHoJK-`rF;%+ONwRwX#EM!!!DluvSY5*^HCVz%_%hH2xWq$mjUVM6Y8r@P>uK zkj?<`-BNBnrfUtk>qD?HkKaGZ=)5VUUPXG4wj!j!r=65x+{U*2d1rm`Lt}8Si;bew zot!4^z~?=?zJmWUJ!j9*)Fs3=@Q&{=DdGEofMT3F2C**sdQtllGLmPtZV75QbCGEX z*Cw4e(9=u0&*iae^y{Z2>s`8o6RI1ca2a9Xx|Ar?UQAMWKwVAhofBXFD@`pX5%f&4 zZtQ9?>7(2Tm1ycNsW+{YB{v$gl=z0QWh?jCgofd0hXFg6cmB1MABofj?2=@pX;G3p zJyu3kF7?;=3!(q&2f!e{Qv}~FJr`iW=488U03TpTT-cUg-*N4XuW3Zw76%<5bj=ACk=Rf>iTOb&wEPe z&CikiX%sGaTp*@odh;S&UKm7m@nbxf7V84`BsA=~Og9|73=MBt&1u-DEz?UckD>n~ z&{5RU$$gZe&#&d&QM62xe5uz8qVn{Z&u&|K9veRf_qmwfLQ_l0gS?RbQi=-B?&J!F z(o3bfx!VjKdYq$UY5tFr7oA#0{J2PN8Pa|~*`0Q{O!xi_qYb#wMRTJcUBLMrU#`#Z zVZ+B4R=Hs}g5X+#zh2`~{pf@nlBFxcBhO={e|(dAmy?jY>bdKH0C;86_198vp_Xzo zj{7^k95t-yP3u<@KXHV>lYuBXUhf-!nb5okTW|)hAMiIry9w2fkPTh+!54{3V+6hF z0ogk|aT{dgK-cv+$v#&JtknG~t!(IIGbU!AJr*m}$8c1q&s`1Znv@@^JdYDxH+`0+M{*c9QyYDW2Z=Ip?+{_bt9dsB;vy0DkIh3aFT-Qo^`sF%dzXSUc$zKfosN|>CpL4;ZeP*O5f1C~T0_EV`xm&`T&&THB(oOX+)yvfZmuN}JRcu0 z?4Jz+i-%}_i9&64h&|p&>+3N0$4g{Sc6JuWGwb__ha1rxg9sP1&(OL$!mr2Z2TXqQ z0VW?xJ?cpS-=uN_O>+Y<6AWBCv`{*#o{Zs47w6vVyB(K62CgkpHEpifTcWS)QU8iF zbaXulq03Jw78<)&PkDTsSA00OQw&^}(Z?4S8SIPze_60yo3mM$c8;H-SGL#=eMwYj5;Uj8K){pch{$4&;v?5ZJVF#7GY|bQYiU ze&bFp6200WE-q*cMfnqUU27`wy6gM;$&|9v7jwv9u*<|X&?ZVo^Y5!09{M_eur=zp e3@SD1Yjs>BE>c4q8nF&@8%eDDiUqjP!v6r9VTDBi delta 7522 zcmai23tW_C)<5qBW?*Kx3CKlm0fGX8h?Z9{w2KO&2IeJ2(Gn3vLGccxvWm*ZS9we; zaV0NlhSEA&dCAS~D^V%aY(F*gt6kl-l{U*SlG<~g=bafwf8Xz$-|x)(%=w?&bIy66 z%bacY*>%{*k~LaU-2sdMD;MS0TVj>s>sxtZE$hUST)$OWsLtNOImx^$O(9(AClnnr9k>>&#+`wgqO zsg~YuLlw@MYwjjE6_)#gv&?&!%%M=Pn@E_C^X@QlPwXdnsszvZt16! zi}w0@0TA(tYt5OemNwp~1.|Ii&`Am0FU-JmBFxZ~G6caWiwzKV=6pPt1=SYU*O z?oa}y?yv|J8=%Y`EU?4~OO3FMM3&R<74EPSR?(>302Rcpbc2^+HF4I^Xf1tJk)1yf zZ=E|NSYSOltv0|0;%_AWCU>ZTTGFpGLOne<8$dI_mX@|LF0x@Rx<~shS2K@E24wd# zz*gDbftvVTS}R zr3+`|6-Z#1xp012$?Sy&c-0KA!RrRtX@*AFrPZ&E>1l@E@JCd7Oad^&ZfG*Y8?eU! zd(E&9_M72NI3Pio7Lgg&&kP6QEsWj_hhU-^-iBr~9H!9``f7o9%-Muf#yK>x*%ldO&S6!B{evpE=LM==A|!T?XeK>Th31qQ)j99dyF4B==i#KTY+21dz%L8fvZNbxpjI1 z!f~tto}FZXQR)Nefvl0p>Lz*r0)~L@IGNEfhNgt7eeg+1Rl7l0Gd^WBW=l*-mn5q8 zBxXS7GrBV6g)%Nn5381S$*F9cCd!V5EUtt7g1_Rm=O!J)lzL1a%Tu}yYe!Be;3O^h zHip4-IP!q$@C;<*$c(=B*{$v4X`$fv@t&Ya_CXG2Md2e#pUOu=VIsJI3Ezh2(TXeh z!WNi>BLjrM2AGT^;|PeticZ1Sa>J%t2~%<8j)7#u3kaKdnp42_cZ}a;fN5CA>6l^6 ztO0T*N&K9yg-`LDIRjm)I7vUGnUIIylxM0b1!##o3XdIyNS~;q5Pc9CT@${oOV%v* zd0Jo=%m&G8ICi4Fe}_5B~5YyZB8^QYpz#J$XMY<#qD+@5k$tj!0e`I`7r{Mx4r zI-j%v5GC#RsJ_KU)?|3 z*pD*|oAUH6fnD?@4!bf!j?^04+_d?7-MGl|y@~#O@YeASVviVn=C&BboZHXsvW_QF zDe1EN=F7sG<#MUmkk))Pix1eVv1%mqT%x*l+1_cEQP)oMh+1yn{ulSBeYP=jYlY7swVis!`^RV4c_~|662o z%t85pQ0O=&M~G~No|eDnML*gq7x3~58#k_^jn~iXHV&MZ`-lWxy(kBZC_8S->7r_B z-^w@Kd8$qix|u%)my6f-PyD{)s?+} zLir;PBlo0oP8di!qr4~_z0#`uC^AxeRk`5B^FL- ztszYEkoMy=&2y~G2!xl6I?{|d3T1l8P~^S&OT z#*5r|WT@wav+G&vX-_`CcK4C1eQ+o6k=WsSoL~3_#m{;pSA9%acA2RT)i?i)86^rs zvVEE=$Ew=!WM8daHn356i09Hy=X;wdK2h3*r0KXH%AFscs|L&44K@?nA)}Ffw@f$V z_SF*qPTWO^qMghHyV*3@umUxJo0?RBrly!Vu*vMpUN2C^oAed|3oPu1d1^5Gu0Z9h z0_z}dfj4oZwL{)L*z@yLGmk#D~ zEtqJKlXy03pQk>{gBwza-o^!ydc3wH$jlzB)1zx7WNe6tqcF^u{amO=XevVF+i;Gs zj3PCPyPKYNx!YAt2oW8IY@epe1+1<}&Ef8Dlj@NeR$rthXq_?L;<83dvacd8f)wIn=0RB3+AhQX=4ZGqm{^H%xFY!XZ7tz@;)%3HKJrE^=Tv?rv28> zlYLR7*KzRdOH;EjT%y5F(#M9_q&1hJo~*f8Ps+(+^gJ__gIQ^=$$j$X%_u2(coNgz zs6JMMYZMV3QEi{5%JWZg)!L^`?1cq-6gHHe{`MbU@cr<8qqWcS1L7Yx?{rzSD^QNE zas|4#vFIhb`8tn2m8)=qL_M9Plc$n-&1mPW3COQdJvwa$j{<@ zmR71pbAR&EhTk@L0ua%m%l2uiJj!$n!7O8;Zer=nXkz1D4&XV5b(X4pJ!Gau2z(@f zw&O-^{Sh3to!GmJ_5AG|^+QG|zM4eN&6i`7?&WB_q%t&f?kl^Qf-*IR zI}X{3K*}{94V}E$g)%(_*GZ+&j>K~DK}(RC{12PBw-)jbiz92iAd&Z&R4oOu3rlQ< zNN1AHEq0RvSi(}hxjt6?BNMP1L~*uk;j^jo%b&RwE&PFnRiQZdzGEp`%7|j8ma9Sh zD966EL3ap>=q;cd5welR;d0Bh(v~5#GhRUD@lqhGTc(G$lc1wx1=PF*PffH)bFB`7 z?oK5r%n`xp<;eX!S8$UCNCsB3Tra-Y4@Z~%if=5D^YU8APA*q7xmB+fsDI`q(vN48 zR;d2GHxrgm-n9VVU83GjX-F?2(Vi@^M8Zveg&y{&WaYtnf`ZtMt&$saU#UZZD-jx8 zEugHGY9x2?V$Af~0=y6qIXBA_n%znWtF}%4YqIj7Q9x~nv6@M%^pfQ)dH>!$+$xFs zImMdNEacN(k^KITl`Y|$&!7wsszm8d%1hgXa(kN;*-)=q0_ME_dYK24QZSa_wH%ICNJ zr!l+!ge4(*+$Mp~rpkf+SwtnaNK!v+F?(6~VvwFrke}w|?&kT>($2hT*;;8Ah;Xzs zU(g~(lBzGu&613d=zd7LlN=J=ncHeTPob+38jvlNy|VF!;kjC0hPp%zf%YG?`xNE~ z`K*ojGQPc9Uq6qNsU{uD-5~d5zH4+Zz1ARixswhpuuqj60MRffrI${3QOZ0mhdHv-H}B?~;kZW=IXB$1t#+jVb`~RZG`RJ= zfcRZ1)OK`BSci$b=YV`z@j7v)XZs0?{=%;1!*1XW%+V>P^@ykckH9PbEeElj^}53X zg4Ta)SB+rD5#keeT*@AP59Zw!@^nvQcuzC2w=}&S7PtLWT7{b*(Ez6`S?+aFD*n5y zvZ`u5h#l1!#NY4R@o;v>21y$bYVEYg7r;(*V*6!n&|mxMh)Y&>YJ(c|$g#8fFE5y0 z#x;%TX{T83kb*e825v+L7xCYK4QfBOqehQt6>(>cBW^7Fb%Nr>E^bu4d0Hm)-f3Ek ze;^RWIaz&XBFTrc_=$=qd$&@LFl`f>^3NqGka^5f+*!pY{q%F)KfC`K>{+51CsT>D zg#7hQD!;U|PO>s^q2LBARLm^3Mt6`|gHXd#0i`XLx;v)POwiRTJ7i`zOp=-XhSTJ8 z`Don0bN-keBIlF!-Fl()^=8G7O{>-A?^X3^*@PDrBIjdgHnYpSv3IuU^2aZfOVgtP zi0FT1+o!3r=PN?KvsS;Gh1OxHttSN6c7@g(@knt0wHP5$g3Sazn=1F8>>}TIN^xb! z>hubpCwoiJ2)ua@u6DU+6kirzuV*c>9`T#40uR5bc(cXzy6Ogku3WW4-P!r8X!^QM z^?SnK{N9ecvEsXmn?rxpX2g4R2)tsm8pU(HtzpZ8>G)Ok_0a>=6$GD1@W`67`^P*V^=k*v|i{eZZWLhM|%VT-9(qs@9=Utjtdr%b6bv!B6)Fv|6~jLn}D}I3O#8V?zP6PdII_J^}ARB;ok-7 zBe$L4yb9M{7!J`8r;tK>3AOeV)xd7V;lC)kajK7_rQ7Ie9N6b!p>7R&%k?zHZMuc) zI+1hB#g4Qqnb};F=gBN@KzkE2?2v&C%}~v31s0o^?USstPp`t;36ZnCm~mZ{k|wGy zERg9&`!FH@}>hF4G4kiiy11X&@ Ay8r+H diff --git a/settings/repository/org.broad/tribble-41.xml b/settings/repository/org.broad/tribble-42.xml similarity index 51% rename from settings/repository/org.broad/tribble-41.xml rename to settings/repository/org.broad/tribble-42.xml index 6ee8bfb78..1c03ce1b1 100644 --- a/settings/repository/org.broad/tribble-41.xml +++ b/settings/repository/org.broad/tribble-42.xml @@ -1,3 +1,3 @@ - + From 110298322cbb1ed8cb1e191b5d00f3d145628a9a Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Tue, 29 Nov 2011 09:29:18 -0500 Subject: [PATCH 3/5] Adding Transmission Disequilibrium Test annotation to VariantAnnotator and integration test to test it. --- .../TransmissionDisequilibriumTest.java | 102 ++++++++++++++++++ .../walkers/annotator/VariantAnnotator.java | 6 ++ .../sting/utils/MendelianViolation.java | 2 +- .../VariantAnnotatorIntegrationTest.java | 11 ++ 4 files changed, 120 insertions(+), 1 deletion(-) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java new file mode 100644 index 000000000..3de179365 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java @@ -0,0 +1,102 @@ +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.samples.Sample; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.MendelianViolation; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.text.XReadLines; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.io.FileNotFoundException; +import java.util.*; + +/** + * Created by IntelliJ IDEA. + * User: rpoplin + * Date: 11/14/11 + */ + +public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implements ExperimentalAnnotation { + + private Set fullMVSet = null; + private final static int REF = 0; + private final static int HET = 1; + private final static int HOM = 2; + + public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + if ( fullMVSet == null ) { + fullMVSet = new HashSet(); + + if ( walker instanceof VariantAnnotator ) { + final Map> families = ((VariantAnnotator) walker).getSampleDB().getFamilies(); + for( final Set family : families.values() ) { + for( final Sample sample : family ) { + if( sample.getParents().size() == 2 && family.containsAll(sample.getParents()) ) { // only works with trios for now + fullMVSet.add( new MendelianViolation(sample, 0.0) ); + } + } + } + } else { + throw new UserException("Transmission disequilibrium test annotation can only be used from the Variant Annotator and requires a valid ped file be passed in."); + } + } + + final Map toRet = new HashMap(1); + final HashSet mvsToTest = new HashSet(); + + for( final MendelianViolation mv : fullMVSet ) { + final boolean hasAppropriateGenotypes = vc.hasGenotype(mv.getSampleChild()) && vc.getGenotype(mv.getSampleChild()).hasLikelihoods() && + vc.hasGenotype(mv.getSampleDad()) && vc.getGenotype(mv.getSampleDad()).hasLikelihoods() && + vc.hasGenotype(mv.getSampleMom()) && vc.getGenotype(mv.getSampleMom()).hasLikelihoods(); + if ( hasAppropriateGenotypes ) { + mvsToTest.add(mv); + } + } + + toRet.put("TDT", calculateTDT( vc, mvsToTest )); + + return toRet; + } + + // return the descriptions used for the VCF INFO meta field + public List getKeyNames() { return Arrays.asList("TDT"); } + + public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("TDT", 1, VCFHeaderLineType.Float, "Test statistic from Wittkowski transmission disequilibrium test.")); } + + // Following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT + private double calculateTDT( final VariantContext vc, final Set mvsToTest ) { + + final double nABGivenABandBB = calculateNChildren(vc, mvsToTest, HET, HET, HOM); + final double nBBGivenABandBB = calculateNChildren(vc, mvsToTest, HOM, HET, HOM); + final double nAAGivenABandAB = calculateNChildren(vc, mvsToTest, REF, HET, HET); + final double nBBGivenABandAB = calculateNChildren(vc, mvsToTest, HOM, HET, HET); + final double nAAGivenAAandAB = calculateNChildren(vc, mvsToTest, REF, REF, HET); + final double nABGivenAAandAB = calculateNChildren(vc, mvsToTest, HET, REF, HET); + + final double numer = (nABGivenABandBB - nBBGivenABandBB) + 2.0 * (nAAGivenABandAB - nBBGivenABandAB) + (nAAGivenAAandAB - nABGivenAAandAB); + final double denom = (nABGivenABandBB + nBBGivenABandBB) + 4.0 * (nAAGivenABandAB + nBBGivenABandAB) + (nAAGivenAAandAB + nABGivenAAandAB); + return (numer * numer) / denom; + } + + private double calculateNChildren( final VariantContext vc, final Set mvsToTest, final int childIdx, final int momIdx, final int dadIdx ) { + final double likelihoodVector[] = new double[mvsToTest.size() * 2]; + int iii = 0; + for( final MendelianViolation mv : mvsToTest ) { + final double[] momGL = vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsVector(); + final double[] dadGL = vc.getGenotype(mv.getSampleDad()).getLikelihoods().getAsVector(); + final double[] childGL = vc.getGenotype(mv.getSampleChild()).getLikelihoods().getAsVector(); + likelihoodVector[iii++] = momGL[momIdx] + dadGL[dadIdx] + childGL[childIdx]; + likelihoodVector[iii++] = momGL[dadIdx] + dadGL[momIdx] + childGL[childIdx]; + } + + return MathUtils.sumLog10(likelihoodVector); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index c9ea7a3b5..143f2eb2e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -32,6 +32,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.samples.SampleDB; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; import org.broadinstitute.sting.utils.BaseUtils; @@ -196,6 +197,11 @@ public class VariantAnnotator extends RodWalker implements Ann System.exit(0); } + @Override + public SampleDB getSampleDB() { + return super.getSampleDB(); + } + /** * Prepare the output file and the list of available features. */ diff --git a/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java b/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java index cf45dab79..d0579fc25 100755 --- a/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java +++ b/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java @@ -77,7 +77,7 @@ public class MendelianViolation { /** * An alternative to the more general constructor if you want to get the Sample information from the engine yourself. * @param sample - the sample object extracted from the sample metadata YAML file given to the engine. - * @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation + * @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to assess mendelian violation */ public MendelianViolation(Sample sample, double minGenotypeQualityP) { sampleMom = sample.getMother().getID(); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 1824789a9..ffb9aedcc 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -168,4 +168,15 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { ); executeTest("Testing SnpEff annotations (unsupported version)", spec); } + + @Test + public void testTDTAnnotation() { + final String MD5 = "9fe37b61aab695ad47ce3c587148e91f"; + WalkerTestSpec spec = new WalkerTestSpec( + "-T VariantAnnotator -R " + b37KGReference + " -A TransmissionDisequilibriumTest --variant:vcf " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf" + + " -L " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf -NO_HEADER -ped " + validationDataLocation + "ug.random50000.family.ped -o %s", 1, + Arrays.asList(MD5)); + executeTest("Testing TDT annotation", spec); + } + } From 447e9bff9e56645458cda83acdcd092cd4e54180 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Tue, 29 Nov 2011 09:57:45 -0500 Subject: [PATCH 4/5] Updating VQSR tutorial wiki docs to reflect the bundle --- .../variantrecalibration/ApplyRecalibration.java | 5 ++--- .../variantrecalibration/VariantRecalibrator.java | 12 +++++++----- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java index 1d5493daf..af7e6e991 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java @@ -57,8 +57,7 @@ import java.util.*; * slightly lower quality level. * *

- * See the GATK wiki for a tutorial and example recalibration accuracy plots. - * http://www.broadinstitute.org/gsa/wiki/index.php/Variant_quality_score_recalibration + * See the GATK wiki for a tutorial and example recalibration accuracy plots. * *

Input

*

@@ -77,7 +76,7 @@ import java.util.*; * java -Xmx3g -jar GenomeAnalysisTK.jar \ * -T ApplyRecalibration \ * -R reference/human_g1k_v37.fasta \ - * -input NA12878.HiSeq.WGS.bwa.cleaned.raw.hg19.subset.vcf \ + * -input NA12878.HiSeq.WGS.bwa.cleaned.raw.b37.subset.vcf \ * --ts_filter_level 99.0 \ * -tranchesFile path/to/output.tranches \ * -recalFile path/to/output.recal \ diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index f60a94a22..21e96b4a1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -66,12 +66,14 @@ import java.util.*; * the log odds ratio of being a true variant versus being false under the trained Gaussian mixture model. * *

+ * NOTE: Please see our best practices wiki page for our recommendations on which annotations to use for specific project designs. + * + *

* NOTE: In order to create the model reporting plots Rscript needs to be in your environment PATH (this is the scripting version of R, not the interactive version). * See http://www.r-project.org for more info on how to download and install R. * *

- * See the GATK wiki for a tutorial and example recalibration accuracy plots. - * http://www.broadinstitute.org/gsa/wiki/index.php/Variant_quality_score_recalibration + * See the GATK wiki for a tutorial and example recalibration accuracy plots. * *

Input

*

@@ -83,18 +85,18 @@ import java.util.*; *

* A recalibration table file in CSV format that is used by the ApplyRecalibration walker. *

- * A tranches file which shows various metrics of the recalibration callset as a function of making several slices through the data. + * A tranches file which shows various metrics of the recalibration callset as a function of making several slices through the data. * *

Example

*
  * java -Xmx4g -jar GenomeAnalysisTK.jar \
  *   -T VariantRecalibrator \
  *   -R reference/human_g1k_v37.fasta \
- *   -input NA12878.HiSeq.WGS.bwa.cleaned.raw.hg19.subset.vcf \
+ *   -input NA12878.HiSeq.WGS.bwa.cleaned.raw.b37.subset.vcf \
  *   -resource:hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.b37.sites.vcf \
  *   -resource:omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.b37.sites.vcf \
  *   -resource:dbsnp,known=true,training=false,truth=false,prior=8.0 dbsnp_132.b37.vcf \
- *   -an QD -an HaplotypeScore -an MQRankSum -an ReadPosRankSum -an FS -an MQ \
+ *   -an QD -an HaplotypeScore -an MQRankSum -an ReadPosRankSum -an MQ \
  *   -recalFile path/to/output.recal \
  *   -tranchesFile path/to/output.tranches \
  *   -rscriptFile path/to/output.plots.R

From cb284eebde82268960c123ba3726a91b4089a034 Mon Sep 17 00:00:00 2001
From: Ryan Poplin 
Date: Tue, 29 Nov 2011 14:00:57 -0500
Subject: [PATCH 5/5] Further updating VQSR tutorial wiki docs to reflect the
 bundle

---
 .../gatk/walkers/variantrecalibration/ApplyRecalibration.java   | 2 +-
 .../gatk/walkers/variantrecalibration/VariantRecalibrator.java  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java
index af7e6e991..62fbad4b9 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java
@@ -76,7 +76,7 @@ import java.util.*;
  * java -Xmx3g -jar GenomeAnalysisTK.jar \
  *   -T ApplyRecalibration \
  *   -R reference/human_g1k_v37.fasta \
- *   -input NA12878.HiSeq.WGS.bwa.cleaned.raw.b37.subset.vcf \
+ *   -input NA12878.HiSeq.WGS.bwa.cleaned.raw.subset.b37.vcf \
  *   --ts_filter_level 99.0 \
  *   -tranchesFile path/to/output.tranches \
  *   -recalFile path/to/output.recal \
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
index 21e96b4a1..520393898 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
@@ -92,7 +92,7 @@ import java.util.*;
  * java -Xmx4g -jar GenomeAnalysisTK.jar \
  *   -T VariantRecalibrator \
  *   -R reference/human_g1k_v37.fasta \
- *   -input NA12878.HiSeq.WGS.bwa.cleaned.raw.b37.subset.vcf \
+ *   -input NA12878.HiSeq.WGS.bwa.cleaned.raw.subset.b37.vcf \
  *   -resource:hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.b37.sites.vcf \
  *   -resource:omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.b37.sites.vcf \
  *   -resource:dbsnp,known=true,training=false,truth=false,prior=8.0 dbsnp_132.b37.vcf \