2025-01-03 14:54:18 +00:00
|
|
|
import shutil
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
|
|
|
|
from plesna.storage.repository.fs_repository import FSRepository
|
|
|
|
|
|
|
|
FIXTURE_DIR = Path(__file__).parent.parent / Path("./raw_datas/")
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
def location(tmp_path):
|
2025-01-05 05:42:51 +00:00
|
|
|
schema = tmp_path / "schema"
|
2025-01-03 14:54:18 +00:00
|
|
|
example_src = FIXTURE_DIR
|
|
|
|
assert example_src.exists()
|
|
|
|
|
2025-01-05 05:42:51 +00:00
|
|
|
shutil.copytree(src=example_src.absolute(), dst=schema.absolute())
|
2025-01-03 14:54:18 +00:00
|
|
|
|
2025-01-05 05:42:51 +00:00
|
|
|
return tmp_path
|
2025-01-03 14:54:18 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_init(location):
|
2025-01-05 13:55:46 +00:00
|
|
|
repo = FSRepository("example", "example", location)
|
2025-01-03 14:54:18 +00:00
|
|
|
assert repo.ls() == [
|
2025-01-05 05:42:51 +00:00
|
|
|
"schema",
|
|
|
|
]
|
|
|
|
assert repo.ls(dir="schema") == [
|
2025-01-03 14:54:18 +00:00
|
|
|
"username",
|
2025-01-05 05:42:51 +00:00
|
|
|
"recovery",
|
2025-01-03 14:54:18 +00:00
|
|
|
"salary",
|
|
|
|
]
|
|
|
|
|
|
|
|
assert repo.ls(recursive=True) == [
|
2025-01-05 05:42:51 +00:00
|
|
|
"schema",
|
|
|
|
"schema/username",
|
|
|
|
"schema/recovery",
|
|
|
|
"schema/salary",
|
|
|
|
"schema/username/username.csv",
|
|
|
|
"schema/recovery/2022.csv",
|
|
|
|
"schema/recovery/2023.csv",
|
|
|
|
"schema/recovery/2024.csv",
|
|
|
|
"schema/salary/salary.pdf",
|
2025-01-03 14:54:18 +00:00
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
def repository(location) -> FSRepository:
|
2025-01-05 14:13:38 +00:00
|
|
|
return FSRepository("repo_id", "example", location)
|
2025-01-03 14:54:18 +00:00
|
|
|
|
|
|
|
|
2025-01-05 10:27:52 +00:00
|
|
|
def test_list_schemas(repository):
|
2025-01-05 14:13:38 +00:00
|
|
|
assert repository.schemas() == ["repo_id-schema"]
|
2025-01-05 10:27:52 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_describe_schema(location, repository):
|
2025-01-05 14:13:38 +00:00
|
|
|
schema = repository.schema("repo_id-schema")
|
2025-01-05 10:27:52 +00:00
|
|
|
assert schema.name == "schema"
|
2025-01-05 14:13:38 +00:00
|
|
|
assert schema.id == "repo_id-schema"
|
|
|
|
assert schema.repo_id == "repo_id"
|
2025-01-05 10:27:52 +00:00
|
|
|
assert schema.value == str(location / "schema")
|
2025-01-05 13:34:16 +00:00
|
|
|
assert schema.tables == [
|
2025-01-05 14:13:38 +00:00
|
|
|
"repo_id-schema-username",
|
|
|
|
"repo_id-schema-recovery",
|
|
|
|
"repo_id-schema-salary",
|
2025-01-05 13:34:16 +00:00
|
|
|
]
|
2025-01-03 14:54:18 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_list_tables_schema(repository):
|
2025-01-05 14:13:38 +00:00
|
|
|
assert repository.schema("repo_id-schema").tables == [
|
|
|
|
"repo_id-schema-username",
|
|
|
|
"repo_id-schema-recovery",
|
|
|
|
"repo_id-schema-salary",
|
2025-01-05 13:34:16 +00:00
|
|
|
]
|
2025-01-05 14:13:38 +00:00
|
|
|
assert repository.tables("repo_id-schema") == [
|
|
|
|
"repo_id-schema-username",
|
|
|
|
"repo_id-schema-recovery",
|
|
|
|
"repo_id-schema-salary",
|
2025-01-05 13:34:16 +00:00
|
|
|
]
|
|
|
|
assert repository.tables() == [
|
2025-01-05 14:13:38 +00:00
|
|
|
"repo_id-schema-username",
|
|
|
|
"repo_id-schema-recovery",
|
|
|
|
"repo_id-schema-salary",
|
2025-01-05 13:34:16 +00:00
|
|
|
]
|
2025-01-03 14:54:18 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_describe_table(location, repository):
|
2025-01-05 14:13:38 +00:00
|
|
|
table = repository.table("repo_id-schema-username")
|
2025-01-05 05:42:51 +00:00
|
|
|
|
2025-01-05 14:13:38 +00:00
|
|
|
assert table.id == "repo_id-schema-username"
|
|
|
|
assert table.repo_id == "repo_id"
|
|
|
|
assert table.schema_id == "repo_id-schema"
|
2025-01-05 05:42:51 +00:00
|
|
|
assert table.name == "username"
|
|
|
|
assert table.value == str(location / "schema" / "username")
|
|
|
|
assert table.partitions == ["username.csv"]
|
|
|
|
assert table.datas == [table.value + "/username.csv"]
|
|
|
|
|
|
|
|
|
|
|
|
def test_describe_table_with_partitions(location, repository):
|
2025-01-05 14:13:38 +00:00
|
|
|
table = repository.table("repo_id-schema-recovery")
|
2025-01-05 05:42:51 +00:00
|
|
|
|
2025-01-05 14:13:38 +00:00
|
|
|
assert table.id == "repo_id-schema-recovery"
|
|
|
|
assert table.repo_id == "repo_id"
|
|
|
|
assert table.schema_id == "repo_id-schema"
|
2025-01-05 05:42:51 +00:00
|
|
|
assert table.name == "recovery"
|
|
|
|
assert table.value == str(location / "schema" / "recovery")
|
|
|
|
assert table.partitions == [
|
|
|
|
"2022.csv",
|
|
|
|
"2023.csv",
|
|
|
|
"2024.csv",
|
|
|
|
]
|
|
|
|
assert table.datas == [
|
|
|
|
table.value + "/2022.csv",
|
|
|
|
table.value + "/2023.csv",
|
|
|
|
table.value + "/2024.csv",
|
|
|
|
]
|