import shutil from pathlib import Path import pytest from plesna.storage.repository.fs_repository import FSRepository FIXTURE_DIR = Path(__file__).parent.parent / Path("./raw_datas/") @pytest.fixture def location(tmp_path): schema = tmp_path / "schema" example_src = FIXTURE_DIR assert example_src.exists() shutil.copytree(src=example_src.absolute(), dst=schema.absolute()) return tmp_path def test_init(location): repo = FSRepository("example", location, "example") assert repo.ls() == [ "schema", ] assert repo.ls(dir="schema") == [ "username", "recovery", "salary", ] assert repo.ls(recursive=True) == [ "schema", "schema/username", "schema/recovery", "schema/salary", "schema/username/username.csv", "schema/recovery/2022.csv", "schema/recovery/2023.csv", "schema/recovery/2024.csv", "schema/salary/salary.pdf", ] @pytest.fixture def repository(location) -> FSRepository: return FSRepository("example", location, "example") def test_list_schemas(repository): assert repository.schemas() == ["example-schema"] def test_describe_schema(location, repository): schema = repository.schema("example-schema") assert schema.name == "schema" assert schema.id == "example-schema" assert schema.repo_id == str(location) assert schema.value == str(location / "schema") assert schema.tables == ["username", "recovery", "salary"] def test_list_tables_schema(repository): assert repository.schema("example-schema").tables == ["username", "recovery", "salary"] assert repository.tables("example-schema") == ["username", "recovery", "salary"] assert repository.tables() == ["username", "recovery", "salary"] def test_describe_table(location, repository): table = repository.table("example-schema-username") assert table.id == "example-schema-username" assert table.repo_id == str(location) assert table.schema_id == str(location / "schema") assert table.name == "username" assert table.value == str(location / "schema" / "username") assert table.partitions == ["username.csv"] assert table.datas == [table.value + "/username.csv"] def test_describe_table_with_partitions(location, repository): table = repository.table("example-schema-recovery") assert table.id == "example-schema-recovery" assert table.repo_id == str(location) assert table.schema_id == str(location / "schema") assert table.name == "recovery" assert table.value == str(location / "schema" / "recovery") assert table.partitions == [ "2022.csv", "2023.csv", "2024.csv", ] assert table.datas == [ table.value + "/2022.csv", table.value + "/2023.csv", table.value + "/2024.csv", ]