refact: reorganize raw_datas and adapt tests

2025-01-05 06:42:51 +01:00
parent 86f0dcc49e
commit f0315d09b9
10 changed files with 77 additions and 61 deletions
--- a/tests/storage/test_fs_repository.py
+++ b/tests/storage/test_fs_repository.py
@@ -3,7 +3,6 @@ from pathlib import Path

 import pytest

-from plesna.models.storage import Schema
 from plesna.storage.repository.fs_repository import FSRepository

 FIXTURE_DIR = Path(__file__).parent.parent / Path("./raw_datas/")
@@ -11,37 +10,36 @@ FIXTURE_DIR = Path(__file__).parent.parent / Path("./raw_datas/")

@pytest.fixture
 def location(tmp_path):
-    loc = tmp_path
-    username_loc = loc / "username"
-    username_loc.mkdir()
-    salary_loc = loc / "salary"
-    salary_loc.mkdir()
+    schema = tmp_path / "schema"
    example_src = FIXTURE_DIR
    assert example_src.exists()

-    for f in example_src.glob("*"):
-        if "username" in str(f):
-            shutil.copy(f, username_loc)
-        else:
-            shutil.copy(f, salary_loc)
+    shutil.copytree(src=example_src.absolute(), dst=schema.absolute())

-    return loc
+    return tmp_path


 def test_init(location):
    repo = FSRepository("example", location, "example")
    assert repo.ls() == [
+        "schema",
+    ]
+    assert repo.ls(dir="schema") == [
        "username",
+        "recovery",
        "salary",
    ]

    assert repo.ls(recursive=True) == [
-        "username",
-        "salary",
-        "username/username.csv",
-        "username/username-password-recovery-code.xlsx",
-        "username/username-password-recovery-code.xls",
-        "salary/salary.pdf",
+        "schema",
+        "schema/username",
+        "schema/recovery",
+        "schema/salary",
+        "schema/username/username.csv",
+        "schema/recovery/2022.csv",
+        "schema/recovery/2023.csv",
+        "schema/recovery/2024.csv",
+        "schema/salary/salary.pdf",
    ]


@@ -51,27 +49,45 @@ def repository(location) -> FSRepository:


 def test_list_schema(location, repository):
-    assert repository.schemas() == ["username", "salary"]
-    assert repository.schema("username").name == "username"
-    assert repository.schema("username").id == str(location / "username")
-    assert repository.schema("username").repo_id == str(location)
-    assert repository.schema("username").value == str(location / "username")
+    assert repository.schemas() == ["schema"]
+    assert repository.schema("schema").name == "schema"
+    assert repository.schema("schema").id == str(location / "schema")
+    assert repository.schema("schema").repo_id == str(location)
+    assert repository.schema("schema").value == str(location / "schema")
+    assert repository.schema("schema").tables == ["username", "recovery", "salary"]


 def test_list_tables_schema(repository):
-    assert repository.schema("username").tables == [
-        "username.csv",
-        "username-password-recovery-code.xlsx",
-        "username-password-recovery-code.xls",
-    ]
-    assert repository.schema("salary").tables == ["salary.pdf"]
+    assert repository.schema("schema").tables == ["username", "recovery", "salary"]


 def test_describe_table(location, repository):
-    table = repository.table("username", "username.csv")
-    assert table.id == str(location / "username" / "username.csv")
+    table = repository.table("schema", "username")
+
+    assert table.id == str(location / "schema" / "username")
    assert table.repo_id == str(location)
-    assert table.schema_id == str(location / "username")
-    assert table.name == "username.csv"
-    assert table.value == str(location / "username" / "username.csv")
-    assert table.partitions == []
+    assert table.schema_id == str(location / "schema")
+    assert table.name == "username"
+    assert table.value == str(location / "schema" / "username")
+    assert table.partitions == ["username.csv"]
+    assert table.datas == [table.value + "/username.csv"]
+
+
+def test_describe_table_with_partitions(location, repository):
+    table = repository.table("schema", "recovery")
+
+    assert table.id == str(location / "schema" / "recovery")
+    assert table.repo_id == str(location)
+    assert table.schema_id == str(location / "schema")
+    assert table.name == "recovery"
+    assert table.value == str(location / "schema" / "recovery")
+    assert table.partitions == [
+        "2022.csv",
+        "2023.csv",
+        "2024.csv",
+    ]
+    assert table.datas == [
+        table.value + "/2022.csv",
+        table.value + "/2023.csv",
+        table.value + "/2024.csv",
+    ]