From 08cea205d4e647d18f1ecd190c2d801c42e14a83 Mon Sep 17 00:00:00 2001 From: Brendan ODonnell Date: Wed, 7 Feb 2024 08:36:54 -0500 Subject: [PATCH] Rewrite dup as ins and try both shuffle directions --- src/hgvs/assemblymapper.py | 52 ++++++++++++++++++++++++------ tests/data/cache-py3.hdp | Bin 945102 -> 986878 bytes tests/test_hgvs_assemblymapper.py | 41 +++++++++++++++++++++++ 3 files changed, 83 insertions(+), 10 deletions(-) diff --git a/src/hgvs/assemblymapper.py b/src/hgvs/assemblymapper.py index 3900548e..78f35eec 100644 --- a/src/hgvs/assemblymapper.py +++ b/src/hgvs/assemblymapper.py @@ -13,6 +13,9 @@ HGVSUnsupportedOperationError, ) from hgvs.variantmapper import VariantMapper +from hgvs.posedit import PosEdit +from hgvs.edit import NARefAlt +from hgvs.location import SimplePosition, Interval _logger = logging.getLogger(__name__) @@ -180,17 +183,18 @@ def c_to_p(self, var_c): and (var_c.posedit.pos.start.offset != 0 or var_c.posedit.pos.end.offset != 0) and var_out.posedit is None ): - if self._fetch_AlignmentMapper(tx_ac=var_c.ac).strand == 1: - normalizer = hgvs.normalizer.Normalizer( - self._norm.hdp, alt_aln_method=self.alt_aln_method, validate=False, shuffle_direction=5 - ) - else: - normalizer = hgvs.normalizer.Normalizer( - self._norm.hdp, alt_aln_method=self.alt_aln_method, validate=False, shuffle_direction=3 + var_g = self.c_to_g(var_c) + strand = self._fetch_AlignmentMapper(tx_ac=var_c.ac).strand + + for shuffle_direction in [3, 5]: + shifted_var_g = self._far_shift(var_g, shuffle_direction, strand) + shifted_var_c = super(AssemblyMapper, self).g_to_c( + shifted_var_g, var_c.ac, alt_aln_method=self.alt_aln_method ) - var_g = normalizer.normalize(self.c_to_g(var_c)) - var_c = self.g_to_c(var_g, var_c.ac) - var_out = super(AssemblyMapper, self)._c_to_p(var_c) + var_out = super(AssemblyMapper, self)._c_to_p(shifted_var_c) + + if var_out.posedit is not None: + break return self._maybe_normalize(var_out) @@ -287,6 +291,34 @@ def _maybe_normalize(self, var): # fall through to return unnormalized variant return var + def _far_shift(self, var_g, shuffle_direction, strand): + """Attempt to shift a variant all the way left or right. Rewrite + duplications as insertions so that the change is shifted as far as + possible.""" + normalizer = hgvs.normalizer.Normalizer( + self._norm.hdp, alt_aln_method=self.alt_aln_method, validate=False, shuffle_direction=shuffle_direction + ) + shifted_var_g = normalizer.normalize(var_g) + if shifted_var_g.posedit.edit.type == 'dup': + self._replace_reference(shifted_var_g) + if (strand == 1 and shuffle_direction == 3) or (strand == -1 and shuffle_direction == 5): + shifted_var_g.posedit = PosEdit( + pos=Interval( + start=SimplePosition(base=shifted_var_g.posedit.pos.start.base-1), + end=SimplePosition(base=shifted_var_g.posedit.pos.start.base), + ), + edit=NARefAlt(ref=None, alt=shifted_var_g.posedit.edit.ref) + ) + else: + shifted_var_g.posedit = PosEdit( + pos=Interval( + start=SimplePosition(base=shifted_var_g.posedit.pos.end.base), + end=SimplePosition(base=shifted_var_g.posedit.pos.end.base+1), + ), + edit=NARefAlt(ref=None, alt=shifted_var_g.posedit.edit.ref) + ) + return shifted_var_g + # # Copyright 2018 HGVS Contributors (https://github.com/biocommons/hgvs) diff --git a/tests/data/cache-py3.hdp b/tests/data/cache-py3.hdp index 9cb5277a95527fb2e447133d88dd8f64cc004e9f..4316c6cfb65d1568cca9517e36110738f449cdab 100644 GIT binary patch delta 45560 zcmdU&37lMImG*mIlFkYVtAOkb5V|`%3fKk++_FG1IwBy52Et{}0=QraL{MzENoE=q z1qEDi*hB?ER74cTVO+pLv9Tq~mMj|^8(YTa;j*Pm>j}muA5lGiczFKk z=%P9E){hTN?E0;qxwU$;d2rI|m9ITx)$3NCa@Okg)y~f9=(OY3?^!*%vvOJG*2*Q7 zLn`N1-d$N!sa3MdF_n8NZ^M4LvIl){s=TgpTjlb~ODgLsCssaD8LpgO9h|&;yyuXm zhaRt^hDX6+%5AI6OLpS*na@vD}fy&fT6 z_2AYW9ZQ!jS+aEL(j{X{m$-2h$L#2cLq~Lqal05=#y|S)TS_}Qj~Q$fLj*9g6#3|= z*_VbmiU;|aFFcJm5LmJhr)QC_z%hjWh+|H|m%S1A2A|*tr65W)1emp@W5O*RtI5P< zxXX!|wBO_CT4p~GrVUEr4G~A}W#T>X5j#6+=`LcRMHh{wEEtO8hgBojv8A;jDJNEQ4BB0{?LwT738=>rh885uJ?36J_}6!Eb+k6zloz{51?ZyQWGO8 z6A)lP5yE`IK^Kw88q37PlvvilS}P^U03aVRtO;lxme!Q9s5s(G1Xs#Uq|6J(1{8&m zTlB{)fEyJ%Ux^!3l*Squi%t$WBCO!=ku=C}8DXq2Ax5nRSd*IQ>JSf(0CRPuWOP`c z^Aj#)6(^wRH!8yEAdUSYS9nu@WT&DC0Lo;*o7*XJ*a?$ZD9Rx{QE)(TC`Cy*4-DuT z&XGH$3N2#DQWLl=mMp8eX3WSaI}S-;HK`uYi?sUmGDJZ$d;OfaS*vEk-bA96GyCnHs}!tBeGkHdPM-w zf8_EfF$^6T6P%I?AZt@Vk>Tw!lsw>8#3nP6&`@L-i}<09MMh2uh(nW~U`bbv1G#~$ z^)OV9Ttc{6K}c*4MaG8AhQ&rco$7@oW{yZ#0R0nfSA^^Y=llyu6|w2gSURmQh_jP& ztTP;JI!AT@#f}K)E7n)i!4Zow0ke&B_=reqoTF$gzyD(L^pO9zbWD6lo*Q(zklg6hEV6&tzHdW5|Fh9mNkXUg@6R& zPmdhw1KU$fsEE-!^5|C_m^^scS2MtwkEBsUfLRDFLiCPPiz>j7lk*V+EVslqAjsq< zAs?>Dc-YS)5!Ey5)L{v~|2!W}6-bWJ9i>eN+#F+)A@mc;8H;ONlB{(^h#C_WU2l@QVp zCRK%c97;mum{FRkyi^s=rj&TRq5>n;Pa;{vmDe9ex(HdYl3;U2@G#g-o~Ci?bmqsh zO|P2z+pDT($1k)?Xaq%Uh)#KeM<8+F*P=}>3HcB#njo1{a^OnBtN|whA8ayqU4^o? zqRZN7@XsRg1<4R#q_2|Z0?gO{F=N&t7dmw`-cn3YBayWeOdXH}P2EcdJV9Mwuo5pa zv%{g71&$pGeso(Ve$>x^PZ%;}lsOSg1{Np{b3n$dP&ACTysX9ysn8%11nk2%Xqi?a ztwEMS(>`66@%oc}ilhmg<;F0HgyuMsr+%E(%L$4>s|fEPD?e#FWJ<;M*lA5!LK%m2 zqSNOeR*oRHBQP1~Lh%`ONDa)b9*R>}Lv}IlQ%j_k0)Dy~%VfA12r#YZ9}mw4d*b)6 z;wvOU5DGdmw{}DjZYecw-1OupbaEs#FoU%kzd?BM2w`K`Wy+Ys*dL(&{Ua5EOhL4j z;uA4sXV6%OMG+k&VXZGlgj5o$Pemvt^;o$yjnh*f38V^Xs06S7bQG+#DGp^BhZ1gr z`NI|f?4mB%(5v=!7o8JJy+e;Al~|IXkCN0yR=obUMeR;3m_-o?t7M6U6)~tV*t82N ztV%|K2@}R(ElPAnoMZ^0!wNlOXBCce{y{SM5=Tw|cyJ^v#$uF1XSp%NaA*#Hd<7$wA!!N7oHN^J7r1f;25!KpskCqD3{-T(_gr-hLy zx-fPDpqxg9Tv02Ih5iK=4DbkH3Skv2s<%=eu|x}{RV(Hna%vNyrV65Xn*W&hhJ&leFhE>Mp#((%s89sR9!X6ANa&Q(DOV&^a77505QDsKx|^Ti6d9Nb z1~t5S{lS6DFZL!wBFSK#8xVvLo$f@5y+csfu%!ZGx@?8Wmh0@rRH<*!DAKqKCh__^ zY7njp>%8JSGE!9E4C#FzY-xs+t3%CLoUT|;!JEn`$i!J+?H=h%S9sRVcx;<>`#wB4 zG`NZjE0Z*nS53Tcr1Q|;xF9s~;6t4|S7+HjTeB5r`HCI)I*O|`(>bnmBc$%(2MZ&F~PgHUBXJ-!c zT{OF{TKB$>zi!3`*n9_jA6_}}vRgW5ulsV(yJ1h+j{L#NA{vyF@N=vQ#-QR_nzL7-SN^xI+oY3N6jeiB=gcU%mE88VPQa>6)ZJjd)ZL`~Fn5#l!`%(#nY$@@#og5WCGMu>)m0t69hK?%5fd{i zQ@4CNg}RWZ`pK^B?Wdy9i%tuL7R_r>Xde~YT7^DSR_Is~<;Q7WqdDHZos~Ak`~(J? z6l&nr0&tU6=+0etsC}}lI^3O|ejfV#2Ez)P+q%T}RiACt=c{FXPF9r_no~4627S7b zy9qHrm4POG%6K)-nT@9pi}TN|pdN~;uIbM1xDt%)Bt}gG;T&j5|28G%X^Kk~66l&nr0&tU7s2_Q%kM(40E=Qqx^w$neO&<8QTR&0Hqet`<@p+dKp73y4#E?t^#O%Dcz+7RCxg%*Or=g|5c%Jl%Sdy-&RR}ix#vzPamK{d#TV(Wremt9Rp1YHSlTyxJfUe zEs>}C;>p>^_RPAo7w;7c&2LrcKo$Bc75Z9Pp`F!b7tOAk-7qNBhM4cpK$AiZyjlQm z(hAK&p6a_NXLsy_LWBFFQ1`-Cfeuojy%p%TvOxPP%N)&qnqdqAwE^ZM3^WN;!rSyu zve~Fpeg07P`y~kUo`Yiv&1?Oe{zWRZj|zRhtk5ELS*+P#^8ySCwISv&WS~i*23{=y zH|Zs`74lRs8p>Xfq0omgJWsd2S{zfLeHG{{Wr3C`%fXsMG>2jks0}b*%0QDqCA>|0 z37vh=4z-Jhs(Vk#uBswW<%n273tIJgvHHwWpBu{h9HA;lYF?^&83ui}M(rlV{3r&R z^eN+Q@_&$RLowB3ret3`5{3Q?Gu}O~RiGsbw4VaqRTk)IWqGyc7|pR51Zo4!k7J-o zpc3Auy@334V_Vdz-ZLe;|0o0+dwDFO;ntTz2dmJq3Vp7u(CgJ@x#lFz$ru!BL(Eq& z(4gyZc&CH+ zl+`P#Pi?Zvt@0#=x%lzFMqbRa%c5@gM1*B z&#b<=asi)ceM9A4m6IzsSB|YbUb(h%d}Xi7TWFn7IjZuq$}6fzcKS)yS06Uo_$2E^ zE|j&Jx4Bxx%P+QhN$2W`3s&`>RkPs-S?jXA>Z(b6hSeK%;s!tD@jjEd*(7ev@g|RS z!$;glg4YHQZwcgn7WE{+yCJh<3gg{0dYhab7~DCd3(oKhSCfY_;RVxz4tLT~N}$TvhAR;$VB?qs z#SKO*F}Zjbk%S}z_d*iy(jmqQLiwV=2oQ_SG~VBX6OrE01W)f|b~3ENJcNTRXVU9p zx3~Y$>0BBYHyY`iijsJ6Pf944>BvN|NeBQzHwdzmfesv}PjU)iWvL<=(E=Pf*bFv| zXd|tGV|wtDg2@F8|L}|;dV-b}aiF8rZ8$M{)9WXRcp!N?2rI5Vp}09xCJ)fP=uk5p zbwjD5GAW0@=K}J9P|S6bL7k&4*g=Im8F8aRQ_0FY=m;mOa=KDNT$RLpZmHPlOxX*^ zm>-v@;?;K(_oOM*v$60Sv`IhFoHywipi4u!}=nBS|7Gub3nHQ&N+# z@`0d-K}HEmAa=+(?YNVWSW;P-_3+OUt(y>z!;cQ)V?GIFP_h;P8n6VrB7{~URU|w; z6xP{5?Z^@;n%{2MKUIb1V(4gokLf3hAy5f2sTp*Nyg~#9OccTtggTE#E1m2a;8=Po znm#99bhh+KV$p#xP0C(yh!)2w04~gCKXb+1VPoqv_{3K^W=PlC*$VC3kN_{%p(GwmjugrNadZxIPyq^ zC0opvbt|cPgF8Vg7qOs)iq0*#I$8rf)mQ0ti%E#mY(hpSv7AsDvi|gm%G@asOl8El z?rIqLR^k*`b?VVk{!uSL$<0jZn0_laD#}iCBxB;icmDB+HZa7AWdh|>MvhWGEkt2w zmBto`%58y|7>HR4IV7&LKu#eCfRQCw#oTfyEjg?{65)o6w1_i38w08Z08TUn3s4vt z6y1VbGJa8p;>m&W0D(kc6B%93sPX*%$HOo(8%hJ$#M)SASqPkC1xereD!@ZVjtzB6 zluZPwh;Is6sF6FsoOn4A<1F#~ZxaAJLY`DP1T8y8NuRR2(ufz>5X*N3$SqRFlu0^N zvQ%IYGzzighC!3XR60xel18nge4H&TY+8K)I1g!2IQg-uyHFM>Ce1jr zAZxIru&c`tKI=~p0^}$to$|EHNnvkGk&3u*L+Ix`?=}Rx!Cr;b8@A@mEg#In(mz)6fx1m}j8=j_=( zBX#<-=jSr{h33K4-oQQksWOeMnz*xn$`&)@uc#(yZs6XgmOaVF)rVz+)vG3D_kQ?i z1KGA${%n9-xr_T;XB^nsk^SO_lRL82=VKlmSkBF^uY5tr^7^gZzmmY?nkO{B#ykyZ z{u{>lGrj~lRK~pJBd;8o$=%%kz4vrXXI7safNXZgH88ykGv2)@z1h$}T_xx*B)Ysz zbjlj&P1Q`(OvfamZJ7BC#+rz(I=|M0S-juyTMFXb_?^4X{D0ha<-d2=o&UjIPyR=D zz4?>w`tlBdxvRU=-9X;uZZPk5H!1INH#zThHz_VqCUH7cGK*RNlLY0=6f*K zq}17=R0DG}P-I{TQvQ>I}>JiBj!Vmg-;y zIz)4*W+^5~)drd$##ob7XM|J@%*{Zmq3TEbP%C@i7m?~V4A*N~pVLoQsUNGQW5PRIRTQPE)BL zsnii=rCzN*$7qh#9EV9twPEJ3VXR51(?Y2R=4PPOr0V;75i2|7Zj^cq<1FrRa&p8Ye`VD%F{sFmISb)>rc z9;F&>JO6T}`hikSDob^i0-dc{sab_dQni8Rs~KyODi5g|n45uA1J#%GpjP(I`;h9F z7|y@e*Hovd)b~|tVOgm+sn44==WE`ANlLY0<~7EelsYBOzXs-WuGF)Crs}W0zZQiRSH^cVLoKZJ_z3j5SHMA{JEx^EsF5*`I%X z)zNO$%C>t1sV>CuR5hIL+-y8movc*%D^*sO>U|3IH=1>t^_V188)*Kwj5SGha!A#{ z+zh1ZtzOfGTG^5(km}nQq-uMrI!UR%r&LFlrMg^!KBW1u<_b)bstq*1lCdVKP70|S zn45uAJ=J}>P%GQntzJ1CaIQ( zR1M6{K&tNQEuE;9y`l5x1KFb(q$)m$*f;@SuT6r3N|5Bd#@jp(URJ$#^>zhb){=Felald&(qSq?XeM&T~ zEYbHA=6=lsn(t$hM7(oZDM98xV5CW*7Qos@P^c`BFZP;mvhq#!Y#ZLL&bGre-Sjiw z)wp&NZoaGC?zZCE;wx1eKKr#n%}Upw_=+&EL2+4zAFgfH@2hHrZr>K#)wsbc+~k?H zxs`A5YPb!zpw&(spXEg%NHp(+z%Pa4$?lXP)c<+*fmn@vyxWDTW7wLa$9(BIO${(}&E6nm0>t^kE zYV9H0O}BV0*s7z0i)>Dh7eT#v7*~CxUi+dI7Z1}FLgjb)MuAONK)nnaDtmntchJ408oe&l&@-}Fej27e6M2UZ zF+!y}C^|8V668Tgfo@KZp+x>DL4iDhER_SV%;KK}LT_M0M^Na;mqe;sAR57Ul`%?J zOuoq^;Dud!f{f!5p<2bIcC`sNmG+-gYk#@`tX3*SNlrLW_@gYD3M^%&S24N6C3G;C zIfEoa(Wy!vn`))0CR{oHN;vcIC`us>WN^wq$O^W z-7(ie%r2qk12hn6C<(k|tYpy;IqtAw9EUVh23mpZGcctZU8ZzMKCp9L#)(5$axjZI z=&$>-w-!@IYSchWO1BLO@33HJxj1se@% zh-wgbm^v<}L{UsDiKarDHv$0(Pe)Y5IE%Df-K`ypdh^i5=pZ0dr`&1OTW4_nu6$A4 z$jM=$HG3)B848#n#v`LdLmumj`czBS#%qO-HgP&R5FK{9B=pzC_2<6?j+3Q266(o8 zMidbeGA0~5wGF?7PK>Uib3h6~Xvb7>EH!o(g&zY+7tcS?lQgbqxdZ^-5l*1;JD^-i z2k3eZ$rwFN z${XdIDph9Yn0EG|BGwu7yTYRa5m7j#ls!MvbOTr%7q(52qv~h{FMQ!(uYAPAU!OE1 zK7(1Epr&`KB+WA9|H_~plcaB)L#7ohdNna6(NdYjMkdM{a*`?F6VH?iYof@CtFr?& zQRG1*B;ol72nQ9>sTNeFBoD=yWseArau~xKoU{WeX+~6Ia||deG*3(jU6Baj7&8xB zafi7s(PmZ<3CtvD)R;!d6y@qhPe=&9&6r8qT4+&VT8lo%(hC#wI~F407l~s1IafB> z(VsL*NUAH9q>J-pig=hd$r2s_BzQy;O(N_Jbk22l90K*GqLS3_%2sZP``Oi5&(<*B^tSl4*mKfX2$)asLUxaq1vl zS||k47x0c-okbH&RbaVj=m1a0fB`ugxB|oZms~B>2T=;5lfbdgoa=%+JnClvKGLM4 z@D)!{T*)S-+xe1@b*Wn@B?Nsq@?F(NyOGMB1K1Etn)X_^bH%YJ^rdp@mVULjYN9>@SbxrziIXi_Rv-_xi0C+`p~PT`XNcr9HrN1UIGUu5P43}qa(rWL z=x!*%y#A(Ujp(u#MSelrCCj<59Q5Mm2;~{=A<*F{fIqbTahFI%IBj{Ptd2U#0&)KH z>NQFDZ zUijin17BeyM17PkrNlB6QiPU(5~X*NCfER`n_-Y~z^wcep9JyyR}2UdKtD&p#~3SM zBcZgNhVZc<0PSOQ1V4@}LkdA%Hax7S*y=?P2cr~0{X{$CAoC+!;-M@ACO7uNvjw6E z_Q=hdl#FwSb*FJW{os&l5#5c}hKHYUv&oubeHkwqB(@044mgtsOdQY`Gmr#`lF6mM zZtgsaAN?#V*wQDja7m-y;V#YSNiEF%Ql~@G1t7kP=Kyuk%z)Igw6@;z?h`fDK_COsJrv zBWxuXY0=;8VU<-9Rxg#b8pN}{)CmB{W=>3$=%qIsK(Vx9iZJQ*Z=lC$5-|A^9YmZp0AOqaEJrcP zk(gxWquT-<3gP4y9V}Lodu+Jc6{I7d|5stOV`39OLE;Ew3SBU9Y-VB7DNsBU1h{1d zkD|m_$WEP0#Xe6%xri8aUDxRowjjnsismszBr-CM5uOgCE<42lDDXG<2%`ohGKoIK z#i1)*6>Ji0Jb!=FNem%$m|T7Wz(R2P_%X(c>DVxjAxKn{V&N-Ph)qDmVJIoG6D)+? zaWagh|JryyUs2z93%=>|C<_-Y8lFFAZvBcx!>jl>$`;$saXbJJ%lhI-E7 zJ^fxy-#LCX<>20Mo#c%yK>AKC(%*50`ax$4?ZpI&S*O^Z_hYzZ3 z)d1OU;`ljzgS&5I)V7-KFn=nvL3P6?Fh(Xiv#!~@?`-%knqB`4#+Hq8{S?Q@qQxVl zb0!}DO81t#?``}(ntlHaNQ21aBltWD_r@+B@$)FNYevu4Ecg=;sL}Lsl$euX|0h0< zQe2gZ%P%dLdRo+&eIU8hWtxMe@*>UHIq`Xvr%J`=QATd-+@h8)`#df7oRN|B8~%3# zz7v9(<$6+f&3zY6&bIsZg_HS!O7ZozU%s3VsND5GT>D&o;k1tI)XPum$U45ixMO+! z11c|-(aSVPX z+uX>rM&75%drF!2>9RdTbEf7jOr_@kLC^x3pUp@U@fN`1y6w>d@SQF0I`d=Pb>+vp z>&}mJ*OR}-U2lH8yT1Gccm4Tm-3{a?x|=_kzs~)n{Ppf8=gZv<8EI0k1@JGZ+~Bf?xnzv~##;9ByVtxrDP0F zyVdJGn)hnnhf%pU#QbjCRTV zNoDD-RH21sTcvV6W#w*DuiG_W(Ah(3v*ERQGRIUv%{{|yX%C!J)p2|%^w(4(tvZJQ1ot%wh#=DE} zHg72R7L{A0a(!jxzO7#0(R^3)J&ek=A?Eip(xhAq;O42^WMr#8uQ$7L4$6HM!PC(FwHn|l3H^Y5Dfz^GgsV*U^#P0FH1M5AJ7bHCSjDW4Kkn1NYkoo0o**Ln~rYP z1zTmS?nb&R9zeR@c_Z`E2YDOXtyQ}>sa|i&a+~UwlfS z;WXR^Hp_R?z1k9Q$76g0z7C(>=i~`=bd-0;6N~!Srq+9#eVT5mub$so{pNu8)L+(F zIkR$QrBXSd@{Y>kl@DOAs;sRXR(WmZ@33c9kLN2>dk;SNRr5v~@0aiDobIOC9h1QF zJ#6oJZpXTrJDyS7*+$bH_16#brK#Kr!UX`Yhj3Gj7Za$8y1cj&*DB)D0~b^*nIC;H zTt83+u1R=d!D~^D5Q{>^y(PlX%Y_9Y{LV7my&4ghytw?}4T@gwh~(J9nO$5Xl~%f9 zVkRzTL~5)P5OlogVXw1d3_o#!!=5-6Q_Aw6_bl2OcKqG$`}sEU*!Q<0ih zJQ!yWHA;}W6|fN(D#$~fxTAz{Wl_C2iL5i$kBMV-1fnFIOlX>23MQ79|59hnSQ0x% z7z`I1^(eE3x-yIJ%nMB{+6=Bmr$6y77|W4-F)Tfc;9*t3k(hmFav4%NHDDcD-9We6<6p;X}GHmT1JZO$v~t`LR%Auv?;F=*!0}0 zBXLJjLbPC&DXO#peWXt}Tr4Q&47Y^h%mq{|Xd`9Av?lgx$WztOAYI6j^v4lz?r6Yz zM##f-${BMi6@!&AbhU1|(1)9{g$Utfa-5{>VFjzH0p z0LmQEX>ra}ID@Wcjux{c)5I*ajOxMH0Y(%_4q+sLmfTV0LJzXwT2A8S>~ZW2ONQfc zqYXYy0fG-yTz;@5LlV)Wzzi=1!6&UPcvz=T@P>Ow2VFd1K)+%Hf0J64F#H%Ia}_0t zxt%rDk{@H?AdY^r!GdYlz#&vjVS#cZkOSslMulrs0Y+p4N)MlpK#M1UaI+I+=RO?t zH+?D;#{#gRLQayVtdbCmwp>7Q{Q*!vLx4quSVs;71Nt~{E)*?PFc(N_L1EC~0KMqO zjekKf1R#i1A(cbWR2=}tq&uE}qDj^eM~}KzQCD!646u^sA%;Gw#{;K<{B>M9bcjma z`3PW`nLaof5}#;O46q3T14F5*WFjeBnk8AkJnDqfiDOyD@e?AeQve!_dx#%Ykin{9 zZ@+8D-#_+-j|?RiW0AQaVM5)BB2{3D!j>vT8G{&8XcKAzi*g|CVH6HdRA8<2nM{)6-iHoa7Bd0||?0b%0dJ-Vg^&8?0iOYySyAq&XRrK6>2-ov&bwzpF}+LuJf#_rcTh zop|(=DFe0d_QN>4;=|CLdIfa(Zie)(Lj(2Qf__1w$CZg*AiWDU7irdF647bZ7SQ}{ zj5QJ609i|z#b1D@Wwf-z0ek zQcWEA5?0l3`VcGI?@E-q8^fxapWakxNHwlhw<*hnr96vB&i0fEui^#8EcYi17s~>ZX~Im)yswc zYG)6XWmh3ncEGiv)%^4gMU7>3v08mbtwzdP{YZs=toe!NpD;tWLqc>aE?#mED9)+3(bWr|^Pk}_!HJ*!^}s-Fzi!0JuKs(J+V{GrJ_FY()u)x}MP;cTQ=ngI z9@jj9NmBJzTR`()Gu9;42FO~%+*ndQ!dIbuJwkm1 z!`YYaPHLQe7b?_^3bl7xsNVCCr%%(b8Nei=x~nai`5iWYTj_V<*0EA*3{}cwOU-(YIhad zL$jx5FHF+PPrsBg^S@%KNvbBq+J-mdp8I!Ue6e2RT^ROF*WrD~FFRXN)?k{hHIMi3 z24?zb8J{c*$JI~in0Iz=_>WKR}FE_eRDFKY0GIeGDu_it4yopbpGIj3_ji2gP9~sI^BL|in xXXPcgE6R=g%8fsi8~dDEiaDm-xRl2BMN-~F=6}ac|F6~D;rSzTM%J&H^S`mA4Uqr< delta 7334 zcmd6seQZ@{9mmhTxA*Nd()8BK>?YR@*={|gP*%ykTVz?M7Ru!YgSTSBq6qChNF?ORn?Zd0m znr6GD^h%T6Sbt)xW#v~pXVdZ|UddC$_||Xkd3;^{#g0C(rSxn(JEN58wrd$s?}~Tv zFF(^6Nju?cv3kR9OB~&}+Zs)EwzXfEyR~iU((A_h`b6!{kR_fNj#y$OW?N#>={c5| z`m_Fo&>=aEQn%4plPBAu3}WVlvJlE)yIQGRYw+Qyfw<%^@u_95OP?AuDYT zwygHXYFWc+jht3GG5|5Yoznd+w(b6+1S!oADk-golwCZ5Yba%&E~OEHvWb{O%mtK` zI_IerWD7r3Hlr4hVnn)AE(Ht9U((6>rml6aPvKO=&_j@O96-n9+6*~6DQ5xYEYjs% zLnRA|MZ{u2$teq!OHd2Q+39u6WagC1xlG4A@{dr$eJ%+*;>pLLre&|E2Kzm@)9<00 z`BanA)pSruC$W_10+gB=ju9%Cp%zf%N4hF=%GF>&Uf;wnzL_>G#AX`(DWqifD=A%u zl)EXVg;El_lpCm`hxiE53n(emog0(zBK%N!BWeLDMr37frfJSw37m@9_6+2_0E|Yu z%!b)PHy@;%@n37VhCGejL^wnrpxl&Y%9~LOxY-eN1DR9bFxmg(N>k&kNx+Y2ejaiT z0l3n1Tx)dBU9>Zgc24Pb`srj1aVzn0K-nn^m7hQ@VCOFHN;8=`<#sM}rCIeMY^U2_ zf|SDmFTXiM%63Zm0HvJMrF@1e))MQ8I{+o6+8K{Sw(vvc0BQj#Mr37fC);u3I2Cc= zFOW0mxF@Hz!;mvfIn9*wS6$9VD%nJACI$f|rz}(sp%#!c>~+j!W~Ft^v7e_C?%Ofg z5xZbTSg(6(+RaPNHmaFRHI4dZcn5{tP25B51eBVrV}#0Gs0Gyck*>;|ay3}cd)=Gf z>B4P$cMK;Y`e8*ptJJibAJDf_%^a$ERqvWFQOK8xBJmYKsmVA-sJsugfEqv2Rhbp0 z=KSZqv~w(qa}nW_u=5iD7o4Tbj6Jo5cA99XR&SiI)5$l8-NXZcvQri+_n;QAv&Gv} zlbKU)=Q3B}O$jv4U=()5!BbGvGOjK+rkWwDX{4ItdgDAuArBGXCLRWqnv`RN%12NO zsPQ9Rl{w{Vu%P$aQ-_jJ6OZCV#7l2LP42W(W8S+5spkDulh(WDQ3`pC*h}mKl$xYt zgv!TJ3#joUU6omBYWlly?`~{(z!EP4*h-ydzig(It10CVdcS<1DxM;KK>QF;QWDOm ztKdcWq4Gzl1*8~}6?X3~IWGTlir_rN@NAf|ulCH8ebn1TGqY*ttZwFK^l^}Qns^3K zX5x+!Di5I+FylwMDznns<&6)cT?XMpoKsS+H6KH4q?D^DrCHxY&r!wm#0$i)03{{n zbVcAr_@VMe)B;kB$VzLM-_DCAO4oPWF?V?*#5@g*COSLtSgq514DmV2c^~B*)jQ^8 zDtU!CLi`R;a-z)S>SD^ zcC&9J)y$%rklr_cq>$H$W5k~TrKZX;Lgk-P3#joUU6pyo)Lh~*#7Gfpei_1vh&{`p zCfe<($(fHkH&9Ij)f~~g<^+YDBu)|IfKn52j8OSE)BrN)Sq@1hbg;|02tiC3g$QkQ%h zTpC#!O1UHAjuVsuP>T7d_Gc-jj#6gm7n#^FRK$q{kpz?!Jp0dUkurr!K#CD~rEg6x zerfq>+wU7&JD`2JZJ45DCh29<M z)L{3OrfrDUl^#x2uZ&;C-_z{j(%)0n^Zm)Jv^H6(Pj={&efs28eNvUt0^0S-+D!G< z_;UWx?%~pFnd*i9B%8%C7b@LZt+H94d{=+l%lc$OpDebuwA=K_*YwFjOlF_|q2Bk2 Vqv^g|Zf|XEZ|%q}$&GDa@;^ZJfl2@X diff --git a/tests/test_hgvs_assemblymapper.py b/tests/test_hgvs_assemblymapper.py index 33d30013..fe172d5f 100644 --- a/tests/test_hgvs_assemblymapper.py +++ b/tests/test_hgvs_assemblymapper.py @@ -210,6 +210,47 @@ def test_map_of_ins_splice_region_preserved(self): self.assertEqual(str(var_p), hgvs_p) + def test_map_of_dup_splice_region_preserved(self): + hgvs_c = "NM_004119.2:c.1835_1837+3dup" + hgvs_p = "NP_004110.2:p.(Gly613_Lys614insIleGly)" + + var_c = self.hp.parse_hgvs_variant(hgvs_c) + var_p = self.am.c_to_p(var_c) + + self.assertEqual(str(var_p), hgvs_p) + + hgvs_c = "NM_005228.4:c.2284-5_2290dup" + hgvs_p = "NP_005219.2:p.(Ala763_Tyr764insPheGlnGluAla)" + + var_c = self.hp.parse_hgvs_variant(hgvs_c) + var_p = self.am.c_to_p(var_c) + + self.assertEqual(str(var_p), hgvs_p) + + hgvs_c = "NM_004456.4:c.2196-1_2196dup" + hgvs_p = "NP_004447.2:p.(Tyr733AspfsTer8)" + + var_c = self.hp.parse_hgvs_variant(hgvs_c) + var_p = self.am.c_to_p(var_c) + + self.assertEqual(str(var_p), hgvs_p) + + hgvs_c = "NM_024529.4:c.130_131+1dup" + hgvs_p = "NP_078805.3:p.(Gly44dup)" + + var_c = self.hp.parse_hgvs_variant(hgvs_c) + var_p = self.am.c_to_p(var_c) + + self.assertEqual(str(var_p), hgvs_p) + + hgvs_c = "NM_016222.3:c.27+2_27+5dup" + hgvs_p = "NP_057306.2:p.(Arg10ValfsTer20)" + + var_c = self.hp.parse_hgvs_variant(hgvs_c) + var_p = self.am.c_to_p(var_c) + + self.assertEqual(str(var_p), hgvs_p) + class Test_RefReplacement(unittest.TestCase): test_cases = [