plesna/tests/storage/test_fs_repository.py

116 lines
3.1 KiB
Python

import shutil
from pathlib import Path
import pytest
from plesna.storage.repository.fs_repository import FSRepository
FIXTURE_DIR = Path(__file__).parent.parent / Path("./raw_datas/")
@pytest.fixture
def location(tmp_path):
schema = tmp_path / "schema"
example_src = FIXTURE_DIR
assert example_src.exists()
shutil.copytree(src=example_src.absolute(), dst=schema.absolute())
return tmp_path
def test_init(location):
repo = FSRepository("example", location, "example")
assert repo.ls() == [
"schema",
]
assert repo.ls(dir="schema") == [
"username",
"recovery",
"salary",
]
assert repo.ls(recursive=True) == [
"schema",
"schema/username",
"schema/recovery",
"schema/salary",
"schema/username/username.csv",
"schema/recovery/2022.csv",
"schema/recovery/2023.csv",
"schema/recovery/2024.csv",
"schema/salary/salary.pdf",
]
@pytest.fixture
def repository(location) -> FSRepository:
return FSRepository("example", location, "example")
def test_list_schemas(repository):
assert repository.schemas() == ["example-schema"]
def test_describe_schema(location, repository):
schema = repository.schema("example-schema")
assert schema.name == "schema"
assert schema.id == "example-schema"
assert schema.repo_id == str(location)
assert schema.value == str(location / "schema")
assert schema.tables == [
"example-schema-username",
"example-schema-recovery",
"example-schema-salary",
]
def test_list_tables_schema(repository):
assert repository.schema("example-schema").tables == [
"example-schema-username",
"example-schema-recovery",
"example-schema-salary",
]
assert repository.tables("example-schema") == [
"example-schema-username",
"example-schema-recovery",
"example-schema-salary",
]
assert repository.tables() == [
"example-schema-username",
"example-schema-recovery",
"example-schema-salary",
]
def test_describe_table(location, repository):
table = repository.table("example-schema-username")
assert table.id == "example-schema-username"
assert table.repo_id == str(location)
assert table.schema_id == str(location / "schema")
assert table.name == "username"
assert table.value == str(location / "schema" / "username")
assert table.partitions == ["username.csv"]
assert table.datas == [table.value + "/username.csv"]
def test_describe_table_with_partitions(location, repository):
table = repository.table("example-schema-recovery")
assert table.id == "example-schema-recovery"
assert table.repo_id == str(location)
assert table.schema_id == str(location / "schema")
assert table.name == "recovery"
assert table.value == str(location / "schema" / "recovery")
assert table.partitions == [
"2022.csv",
"2023.csv",
"2024.csv",
]
assert table.datas == [
table.value + "/2022.csv",
table.value + "/2023.csv",
table.value + "/2024.csv",
]