plesna/tests/dataplatform/test_dataplateform.py

75 lines
1.7 KiB
Python
Raw Normal View History

2025-01-03 15:01:01 +00:00
import shutil
2025-01-03 07:59:54 +00:00
from pathlib import Path
import pytest
from plesna.dataplatform import DataPlateform
2025-01-03 15:01:01 +00:00
from plesna.storage.repository.fs_repository import FSRepository
2025-01-03 07:59:54 +00:00
2025-01-03 15:01:01 +00:00
FIXTURE_DIR = Path(__file__).parent.parent / Path("raw_datas")
2025-01-03 07:59:54 +00:00
@pytest.fixture
2025-01-03 15:01:01 +00:00
def repository(tmp_path) -> FSRepository:
2025-01-03 07:59:54 +00:00
raw_path = Path(tmp_path) / "raw"
raw_path.mkdir()
2025-01-03 15:01:01 +00:00
example_src = FIXTURE_DIR
assert example_src.exists()
2025-01-03 07:59:54 +00:00
2025-01-03 15:01:01 +00:00
recovery_loc = raw_path / "recovery"
recovery_loc.mkdir()
username_loc = raw_path / "username"
username_loc.mkdir()
salary_loc = raw_path / "salary"
salary_loc.mkdir()
2025-01-03 07:59:54 +00:00
2025-01-03 15:01:01 +00:00
for f in example_src.glob("*"):
if "recovery" in str(f):
shutil.copy(f, recovery_loc)
if "salary" in str(f):
shutil.copy(f, salary_loc)
else:
shutil.copy(f, username_loc)
2025-01-03 07:59:54 +00:00
2025-01-03 15:01:01 +00:00
bronze_path = Path(tmp_path) / "bronze"
bronze_path.mkdir()
2025-01-03 07:59:54 +00:00
silver_path = Path(tmp_path) / "silver"
silver_path.mkdir()
2025-01-03 15:01:01 +00:00
return FSRepository("test", tmp_path, "test")
2025-01-03 07:59:54 +00:00
2025-01-03 15:01:01 +00:00
def test_add_repository(
repository: FSRepository,
2025-01-03 07:59:54 +00:00
):
dp = DataPlateform()
2025-01-03 15:01:01 +00:00
dp.add_repository("test", repository)
assert dp.repositories == ["test"]
assert dp.repository("test") == repository
@pytest.fixture
def dataplatform(
repository: FSRepository,
) -> DataPlateform:
dp = DataPlateform()
dp.add_repository("test", repository)
return dp
def test_listing_content(dataplatform: DataPlateform):
assert dataplatform.repository("test").schemas() == ["raw", "bronze", "silver"]
assert dataplatform.repository("test").schema("raw").tables == [
"recovery",
"username",
"salary",
]
2025-01-03 07:59:54 +00:00
2025-01-03 15:01:01 +00:00
def test_add_flux(dataplatform: DataPlateform):
# dataplatform.add_flux()
pass