From f0315d09b9d1cf2c3b3babdcc667c7dffbeb604d Mon Sep 17 00:00:00 2001 From: Bertrand Benjamin Date: Sun, 5 Jan 2025 06:42:51 +0100 Subject: [PATCH] refact: reorganize raw_datas and adapt tests --- tests/dataplatform/test_dataplateform.py | 28 ++---- tests/raw_datas/recovery/2022.csv | 3 + tests/raw_datas/recovery/2023.csv | 4 + tests/raw_datas/recovery/2024.csv | 4 + tests/raw_datas/{ => salary}/salary.pdf | Bin .../username-password-recovery-code.xls | Bin 6656 -> 0 bytes .../username-password-recovery-code.xlsx | Bin 6477 -> 0 bytes tests/raw_datas/username.csv | 7 -- tests/raw_datas/username/username.csv | 6 ++ tests/storage/test_fs_repository.py | 86 +++++++++++------- 10 files changed, 77 insertions(+), 61 deletions(-) create mode 100644 tests/raw_datas/recovery/2022.csv create mode 100644 tests/raw_datas/recovery/2023.csv create mode 100644 tests/raw_datas/recovery/2024.csv rename tests/raw_datas/{ => salary}/salary.pdf (100%) delete mode 100644 tests/raw_datas/username-password-recovery-code.xls delete mode 100644 tests/raw_datas/username-password-recovery-code.xlsx delete mode 100644 tests/raw_datas/username.csv create mode 100644 tests/raw_datas/username/username.csv diff --git a/tests/dataplatform/test_dataplateform.py b/tests/dataplatform/test_dataplateform.py index d0ec044..081c487 100644 --- a/tests/dataplatform/test_dataplateform.py +++ b/tests/dataplatform/test_dataplateform.py @@ -13,26 +13,11 @@ FIXTURE_DIR = Path(__file__).parent.parent / Path("raw_datas") @pytest.fixture def repository(tmp_path) -> FSRepository: - raw_path = Path(tmp_path) / "raw" - raw_path.mkdir() - example_src = FIXTURE_DIR assert example_src.exists() - recovery_loc = raw_path / "recovery" - recovery_loc.mkdir() - username_loc = raw_path / "username" - username_loc.mkdir() - salary_loc = raw_path / "salary" - salary_loc.mkdir() - - for f in example_src.glob("*"): - if "recovery" in str(f): - shutil.copy(f, recovery_loc) - elif "salary" in str(f): - shutil.copy(f, salary_loc) - else: - shutil.copy(f, username_loc) + raw_path = Path(tmp_path) / "raw" + shutil.copytree(src=example_src.absolute(), dst=raw_path.absolute()) bronze_path = Path(tmp_path) / "bronze" bronze_path.mkdir() @@ -123,11 +108,16 @@ def dataplatform( def test_listing_content(dataplatform: DataPlateform): assert dataplatform.repository("test").schemas() == ["raw", "bronze", "silver"] assert dataplatform.repository("test").schema("raw").tables == [ - "recovery", "username", + "recovery", "salary", ] assert dataplatform.repository("test").table("raw", "username").partitions == ["username.csv"] + assert dataplatform.repository("test").table("raw", "recovery").partitions == [ + "2022.csv", + "2023.csv", + "2024.csv", + ] def test_execute_flux(dataplatform: DataPlateform): @@ -137,6 +127,6 @@ def test_execute_flux(dataplatform: DataPlateform): assert dataplatform.repository("test").schema("bronze").tables == [] meta = dataplatform.execute_flux("raw_brz_copy_username") - assert meta.data == {"src_size": 175, "tgt_size": 175} + assert meta.data == {"src_size": 283, "tgt_size": 283} assert dataplatform.repository("test").schema("bronze").tables == ["username"] diff --git a/tests/raw_datas/recovery/2022.csv b/tests/raw_datas/recovery/2022.csv new file mode 100644 index 0000000..f1cc2a9 --- /dev/null +++ b/tests/raw_datas/recovery/2022.csv @@ -0,0 +1,3 @@ +Identifier,One-time password +9012,12se74 +2070,04ap67 diff --git a/tests/raw_datas/recovery/2023.csv b/tests/raw_datas/recovery/2023.csv new file mode 100644 index 0000000..5bcd5a4 --- /dev/null +++ b/tests/raw_datas/recovery/2023.csv @@ -0,0 +1,4 @@ +Identifier,One-time password +9012,32ui83 +9346,14ju73 +5079,09ja61 diff --git a/tests/raw_datas/recovery/2024.csv b/tests/raw_datas/recovery/2024.csv new file mode 100644 index 0000000..61b5280 --- /dev/null +++ b/tests/raw_datas/recovery/2024.csv @@ -0,0 +1,4 @@ +Identifier,One-time password +9012,74iu23 +2070,12io89 +5079,85nc83 diff --git a/tests/raw_datas/salary.pdf b/tests/raw_datas/salary/salary.pdf similarity index 100% rename from tests/raw_datas/salary.pdf rename to tests/raw_datas/salary/salary.pdf diff --git a/tests/raw_datas/username-password-recovery-code.xls b/tests/raw_datas/username-password-recovery-code.xls deleted file mode 100644 index af0329b4cbfb66ffdcea1454a2de462bab523e18..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6656 zcmeHLYiL|W6h3z!>88)5c70oKQ=6u3Qg@Ry4@8^3ZAooN>jM$>W^P5TSoW@qwT}6l@X1ABrLf>aXfQQDhZG1l#rd&fM*;+r%bT1f{c_d*_@v zGvAprXJ*dcPd{ls@!}hoel2z78i~rOa-%fV$Su_8SNJB82N`M+cY+De%|3gzdZ|W>OFf2ZPMX_a@cz6eL2rE7Nav>t77G^d* zYtM_>=$}pJJbu*!XY&=?YyHpjo9B2GG}Hd)na};d5VQ!i7_ z3qcowE(To!x)gL7=yK2%pyi+|K{jXws0FkVbQNe7s1>vt)CO7uS_@hSY6o?I)`K{Q zRQP7S3-#`h@4>q_tXDmAzBZi)8WUK>hPb@5=p!EadiH;fhN&W zm$Cn?vRQ3#EXL+B1nIS?S@Jxji{&+yzN^n`HTs}5Aa);K%t(H=2GsT=`fSnXVmvK* zTl;*ak~HG-L+F1cz%VP`WDDT%DuIC0$?u71}qw-7H z21=zIvni)xD*t(Qa(Zhz{h8z(bvph3uly){YIAyFsrY%F@k6OzzGen_`wa56GsqjK zl?x(4?RmC}`%DCTZA!{~H%&$L-8|Kx@1;{QeP1v&Pv6U?;^jLvrXR(Lxk8imE9GFp z4Lm3Bc1${j!aaU4+7Y-J|86%p)RFN=-6yb_SKxQtN`OT69kJv)xs^wHX;|%ODDmj# zo@d}N2y3ZJ7L<78zlQ`dArDzd5fjI-m6%YWphfJ-8Q%OZhGAn_~iiQnaj#RHrN zJCe#9L;G;A>>PEyVsHo-#P<)lLAn#ZNp}|9-fmQak^YoQhMmlQHwVdP)k5;S*U7mB z47c0y&{Qb80b0ic_fV=Al~lJg*~3b1yff9Cg8ra$FmT|g9Z&%aEBqqb2YqkU_h90L zzuznPUSAr*u9WBZ^`MfO=uY*ep}r+>vg0@kb{Q?|yWWAUSLmi%x_jbaZx<^0iT_fn^f z|8cEy7e?fro=GG@sDy=gI7m!0V@qqxKCFMizQMi69`^H&*M5T^>lZ#S;4skfp2aK{&iJH9AwTvJnjQCjf7 zXB3m;@36F6?N+6KO2@deDlP$JC6%)PCfOA--oL9KJwLU)PE$gpRvjN7|j#!RqY z#^tM*A(s9mI~jf(Fm@Uaeo#QeMZgi!a8Yo?GF%KCQ4ALc$MFr<01lxXa`V6;)|KOA zlC(k|K(MOc<+CR$ZN$Tt3N?wxfaaJMu#%e78jg6}35Fvc^IXFbk3J?x>yw)M5|81Z zlw0je8}aCSqe(nw$A%*w4^6`nj~TP!h{xR7aKxhz$!D12GP;|e4-S0`Rf@#a5m`o%Ecq~`D(B?fDfn#~)Ic+Be! zM?7Zuh9jPRnyd9E*Eor*$gE#n@owK^`o$HmCF~bhyp^Xp;$3x`BOXo$#fu|Ws(5jo zAJ^}D8RC&ux*l~LS0zS`ZU#(xi9|~&@Jl3CYEyY*$?PI9H6cK+wuoy`P( z!5=Hy+wRS{IlI5NW1lnP=5nrXsm5q zw>|rtUVbVe>sG({C0FP^5FO5T8qWPa0HPNMLAVp;77%yyD2V%b5=0}vJ!$@XvYrMF zfMomo3@TG+1!X{==E3?@`_@Nx{O}ba&nPq5PmIXtAAkP*`pcT0c@Fls|Mu!u$l`Uf z+aTkv<=`Yn9sAPOA-s74u-G8BbIxoZ0Fh^%bKW#Ld6OFykn>dgb3ZcX(i_&NP*6Lw z{FC2*)c(2{JQFdTlexC!W_*1LeP16ic=M zIgc3G0~;oca1Kx6bB(zFSZ34pYAEdt6)oWA#GOsIX?1uD8D#UER;f(zkWm|4{Q@~Rd;0#y a{5RC#hi079Yp(vU{r+}Nz+vZe!T$p~UWWky diff --git a/tests/raw_datas/username-password-recovery-code.xlsx b/tests/raw_datas/username-password-recovery-code.xlsx deleted file mode 100644 index 1e2d2dd6e8c5122c14b6e6cc8aecfd6faa09b759..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6477 zcmaJ_1zZ&WwqJ4=knRxakcOqZyF)@sSQ-{sx}{4%LSPXP0TJnLK}u3eK)Mx>20;mV zi~swsT;IE%`E1PWoU=3MeCPW+=c}cPf=UDcfk1#J8yq9REu%nu_i^GfcZWE^xV)k6 zj*p;FM@}DSC(fHAWjfLhUEFxBUx**=*X#%sk;+tDTM97N%R$u-#} zF_ql6)=RXC+tI51t)cXC$FB25mq>X}d5iDdtp{o%Yn*zwdfr{l7Fd;MTX)Z*={!>lr9`~55J?ktIITh zFQA@AVO%1W>&F0Xeb6?1svMgc@gk&&OQBpZQ4`i@&x8Cv&^vSS>D?w$7+Tdwe-SF*v4 z2?{|(3mTWW`G?(_(1}zgxKpz4dCJLWEz%Q)yMb|$K{-H{2GZFdO%sc=+L$aQ{-QS0 zTxpYBHy}dWT&n{i02GDtHxLp210oMQh%%OARimw)_k4E;HzLIUrb1` zpvEFSJgB!RmbdiQWr>TW3daMp`eVi@y|G_=P(x@Tn-=Zi$}A^sUIc71nP;$#a!1`L z@6_MOo=6A^e_n`g)>@kEB$7FliJWIo2n}L>JH-3WP4|d7Vs?@aolz*anL`f3I`$$U z!5mp>`U``kvY7m*lrYtrHQHl%AfA6%S;z@qc2H^m*}X#A5gI}f3|z=p;ugR-b`!f8 z_B3G2@*KWcJ4NX(-Ua}Cd7HX2GlsmXJYUK2+ALumZ)E?)V8h8860Tdk_|^A;C7kaO zMgHdv4}t`ie=OW>pI63ulYWdJL?Qoa1w~IO4>w;h!S+1Vy6{pgcEkps{S-T>(9khp z+hm!^VEwOOMj|MzF@|tPS&=P$fb}t^FtjtKoRsSMf?3{>o1&u83gW$0Tm>u7k zhcY01U(*^m`ilQAyZ9yL|xkN=M&brtAYNQs%`7k~h;G7^HMA zgkzhiQ`s{r)zuDumJ*aTP#P?p0qI-d5_>SQKS`J05`53bDs%UE_gc99s?+cbbP)aC zX^rg_;SIb_D-T1d5%3xV{S98&|G*38;p+r}-2iLX&=i`*O%%9PQF-}RW*qFy{9;=K zGCQbORt&bE4Z}h&Jx7_*pgjNT{RHE2!9s!BECH)#J?~J#e%s27X!fkAsTj0JF&VVW*NtsM#gpVdqlx3=(B)yJ%V|lzof=|Wm*Y0`cijo8Wti=l z^EKv+NuQPGo~QJiXaw_oXaPT|0w%wvo6_O1<(6^|HhYldh!M9Bfj3IZk-|Ub9~vc& zg1Ps@#>t6Hg7d_~c@;1jG1@C0JtuTw*?Xb6ZB*XDunKOVDK*k=o_cfNm2T=jQa15> z_1&}<9JO4X5Bm={f<~sH_d+*$m&(C2cPwOu_MNBM>QHp_TXR^MAAJk0?|9E-4KRfn zC`k(-VF6clee(UgxX!pqd2v7dG0Ey zTf!?u#kZE9DLAy#+K^!Lznd2ipYTEv*M@cN0~aR$4VzX?7tZUN%oo7{XHh?X3I|PF zUGgq@EuI~xPvP#WT_=uv9gya|C~IFVt1jtb-OKA+7_qvXylP%3QP$J3YQce_mG0WRWY_xhZuW5KSr`W68&IN9*4Al$-l@k$yzF{*hHmu zc^&G>YgzeoF!d7o2F}&XHWsM}IKLtI8=T30FS#%~2*d-%_3PsoWwsd(y37d?EsQ(> z8GG>8QP*E8Ig@_#4*_GqJurt7p4nwZGrVDNX8v*BRA6nyG$XBLoh_x|Ua_7J-Qeo9btdg;BbNlSH&U8$N@}!iTBRuU{hy+ros{l# zU#Jk``G)BvSe8kd1SIyH$&|$b{eY1Z)q>Q1lf!G4GE%tYnOz4q>p>Xh2BtAuA?+9v zN4?%r`#3s@L^<4Zb{hZBS#Q+6p$~;&~m#o<9JwhHW);Hk^Gb6Z6`ozS^5>*?GX}FOYs9 zx?n)rJkPR$OG02LW__~B9l>!o-T%$0=VG}r9^<*>X+`Xi5^nXgz4fppwhC81;8)8X z+_tbgl`?8-a;`BM@aO{q9zOhC)$VyY4^Dh6LVQ!YXG|~sV+=E+&7K@cJvNR!$cBKP zbezNQsLPe(p-7Hyi}Bl|!wHz^Mp1K3XyB_@SaMhp`^R?h3ihM~(}Yve!7%~)3AppP zzzOIo$6%m1Lea556K#9~wMwdX&(M$texyoB;ILB8p47pq^d%c@E90p>w%`_7sK@2o zpRcPA>`aAgq-qt%=8?x{tt=ng4RCSbnc`uslZMLevnyFEysFz};LSDMO)v;Y#QwUQ ziT3i_>3n!?oen2EUYYPSlISUGHo~J4MT0X$aQHjNZ1F9MP`MyYwvCUS`E}Prh`7Nn zH*pcChapbqjK<(WJ*0pdq0u)ra(pZfs|o8*U7tHD@nJswNg zY-y*4pVK`0jEc*nhO?)X<^WDZC~WUq-(5>%=!x<1W$Bd6WS?@M4T{Ky%wcYqSSB>Z zI!I`L>@bN***AyhXGrzxgx=Nl0Zg-}2$*cSH@}>J{KSUkx<*WxWtxZ`H2FR~&)eo{ zIE1}jo`r4o^}cs(-Af9;VfK}3i4)3etQ+>-qy2#-ga!Zz5&eyQ5kKAxhA=xzcZjvF zhr7Lt?XR99Elpixof|LW>^a>L^{R(V5hbJaDiq9tM{ozol1%3oJxHG3HQMlAoy$h> zjgfqUq-1+_?(X){ghoHmPawe|E!#egrmmdtaDwv$?3YgR;Zr?|iqE5ew#Nri;B zowQZrXP@Ak;!ohoGX_#IS;{X7_v3$}=pPfbqmq<>!TNrHYozO)q)=bJP5JNiEB4$Z zDdZW~LQ63U;+9`mdH1Z8vxla#58beP^yj;-M+Jq#a0N&>3B5VOGXVIP+w-F5PK=>7 z#8yVpTG!2!GLy{FmRWL}lMN>L=XVIJqXPi3=t7_f|3rV&SS{0z)@OOT?c;?ck+z34 zbir~Zgjjst696eB~^3|DM!qi=ANb+_K?eezkfD>_j3u9%ayanE}5Bqj(dKy!k zKy_;Zq;Dib(sy_F>{#D+S_STQtNB{sY@NebCLX`KPq5q#jKlzjoM@VqzNp)9(TTt-NM}>=OF6@ikOsZxv`bIIxZA?~yMcsox-F&$O zhF=Et;tMJ2KxOZ{Cu*vk46R5H6!?22sbFp%tE-M7eX-iH28c^Vxtr>H^ZvAs{v2+! z6Hnk~Yw+?@FluYKy0as{i}F1xKMB6o2Xra4UVSQR`2j^jtpwPRNKD^FNMO_8(tauq z`5kU0b^LVN&wG6&6AcNpSE++r85xAlO`qLeADy5Nw-SVN+j{t`(+4W4?d1~Rj(~&Vdx#`_`0Ln@k2u2`YNh24b%k+RLERxYZMCz`l4ck;UeH+uO<+`~ z!jMx4qr>SsK|UYFl0#F~eGYYh%DG_ZW9){;hKrz+mc_c6fQoGk;8efD7lxQe1R*5q zte$Bp4U=@wpLWnrGgeSy3k!$PAFnbIM#l7Z);QlWOkGC4S8$h*iJ4Nai^cO1N^DD? zXf5gV6MZW2T4Mf(mj{@e3<=O;WlJYv7r%?2np2p zZ3-WFVrUrJk5&ZWae)#Y%S+0ZG^^@YRtp>A^%{P_MIW9E8mqGG<4UYyde5WkDsmps zS6VKooJa)vJmgT~iD9-U8|h>)Wn@(?Vt;$F12;oI$>!LxKGTmafQoePO}Sir!$yvJ zs<7;sYS<-}I}p57^d0Snm)fOEVY&$5#{RV;c(*HJ>FRn@knALN$6x5ZAbxj3A&6L7K5*D5cqbK$FGJ!BUiPd) zbgXi96{y{8Pxl-IxY8Sy+{nLAkTqQl1S8!fj#W^m^8%5U=ZNI}cLnQ4eE1DzPnZYP z`B&~fRAF{<6Gf~n6oF>u^ng#tYvY7O_0EzO>ycp4KJ~ajfBMk9gs6KG8WZAIvZZ)4 z_zq26Y^(qdY#e3|=vASR^1J?J3+nI}8|dsUUegch(W3j2;@D{%g^P$B3O{{V#!eTu z;-r^{>g2fTfD=A+X6~#arHJTFYR^Ov{-6sG{&Bd?f2E3Jy|n**5lZXtoF8O6M|yK# zD^q}wbi`F3B3^&LFB4g)iwDHT!(7kT6#_H8xvw_;egvc8wVkoinQN~;Ly=)|#${K0 zEhkKGrAaX1^K|#w={`BLR@!*Ped2V(!yywJ-m2q6oymUW=jjT#{WM=wP{D#S+wMwc zuW+~c$rJs4ctGAUXOvte>Ct|oR1vR`;>jh)qM(m(i$x3YWUPxHF~m5jUTS|izX=d2 zpmk5u4g#WOP181+uX>flxhBuo`)WHVWz^NJvs+~JvMj{Fv@TFOB1la3#<^_C>9VNI zmA`VGvKgAXU?>r@#a*=E?~{1R!U2zRDi<^`m6(Pi6kl;a=5#!#h|9azJG>a539(}p zh57o23o>fz^2yT*gSx(8(q=Z`WE>DnNgWTvS((1!Uxiqq?!rcAWMyG;qfz?UKj$|D z3WDuCQ4VU2nx`)&y5ZM`!26%2#DG!lKV~Sa`UCqEMGvld);B#?|7o(#(YyVmQ3^v7y^g#=r=V( zLM8(I)}3!Ftv9;!f7*Xnq5q6?TWq`0Wq%tB;-3+5ZmYC^I^UL3ZXSu>=8Vwp5YB(9 zbAJZ7J-ps1F28LE(Nx{EO#jwj{&cUi)quf#3EWk FSRepository: def test_list_schema(location, repository): - assert repository.schemas() == ["username", "salary"] - assert repository.schema("username").name == "username" - assert repository.schema("username").id == str(location / "username") - assert repository.schema("username").repo_id == str(location) - assert repository.schema("username").value == str(location / "username") + assert repository.schemas() == ["schema"] + assert repository.schema("schema").name == "schema" + assert repository.schema("schema").id == str(location / "schema") + assert repository.schema("schema").repo_id == str(location) + assert repository.schema("schema").value == str(location / "schema") + assert repository.schema("schema").tables == ["username", "recovery", "salary"] def test_list_tables_schema(repository): - assert repository.schema("username").tables == [ - "username.csv", - "username-password-recovery-code.xlsx", - "username-password-recovery-code.xls", - ] - assert repository.schema("salary").tables == ["salary.pdf"] + assert repository.schema("schema").tables == ["username", "recovery", "salary"] def test_describe_table(location, repository): - table = repository.table("username", "username.csv") - assert table.id == str(location / "username" / "username.csv") + table = repository.table("schema", "username") + + assert table.id == str(location / "schema" / "username") assert table.repo_id == str(location) - assert table.schema_id == str(location / "username") - assert table.name == "username.csv" - assert table.value == str(location / "username" / "username.csv") - assert table.partitions == [] + assert table.schema_id == str(location / "schema") + assert table.name == "username" + assert table.value == str(location / "schema" / "username") + assert table.partitions == ["username.csv"] + assert table.datas == [table.value + "/username.csv"] + + +def test_describe_table_with_partitions(location, repository): + table = repository.table("schema", "recovery") + + assert table.id == str(location / "schema" / "recovery") + assert table.repo_id == str(location) + assert table.schema_id == str(location / "schema") + assert table.name == "recovery" + assert table.value == str(location / "schema" / "recovery") + assert table.partitions == [ + "2022.csv", + "2023.csv", + "2024.csv", + ] + assert table.datas == [ + table.value + "/2022.csv", + table.value + "/2023.csv", + table.value + "/2024.csv", + ]