2025-01-03 15:01:01 +00:00
|
|
|
import shutil
|
2025-01-03 07:59:54 +00:00
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
|
|
|
|
from plesna.dataplatform import DataPlateform
|
2025-01-04 12:51:24 +00:00
|
|
|
from plesna.models.flux import Flux
|
|
|
|
from plesna.models.transformation import Transformation
|
2025-01-03 15:01:01 +00:00
|
|
|
from plesna.storage.repository.fs_repository import FSRepository
|
2025-01-03 07:59:54 +00:00
|
|
|
|
2025-01-03 15:01:01 +00:00
|
|
|
FIXTURE_DIR = Path(__file__).parent.parent / Path("raw_datas")
|
2025-01-03 07:59:54 +00:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
2025-01-03 15:01:01 +00:00
|
|
|
def repository(tmp_path) -> FSRepository:
|
2025-01-03 07:59:54 +00:00
|
|
|
raw_path = Path(tmp_path) / "raw"
|
|
|
|
raw_path.mkdir()
|
|
|
|
|
2025-01-03 15:01:01 +00:00
|
|
|
example_src = FIXTURE_DIR
|
|
|
|
assert example_src.exists()
|
2025-01-03 07:59:54 +00:00
|
|
|
|
2025-01-03 15:01:01 +00:00
|
|
|
recovery_loc = raw_path / "recovery"
|
|
|
|
recovery_loc.mkdir()
|
|
|
|
username_loc = raw_path / "username"
|
|
|
|
username_loc.mkdir()
|
|
|
|
salary_loc = raw_path / "salary"
|
|
|
|
salary_loc.mkdir()
|
2025-01-03 07:59:54 +00:00
|
|
|
|
2025-01-03 15:01:01 +00:00
|
|
|
for f in example_src.glob("*"):
|
|
|
|
if "recovery" in str(f):
|
|
|
|
shutil.copy(f, recovery_loc)
|
|
|
|
if "salary" in str(f):
|
|
|
|
shutil.copy(f, salary_loc)
|
|
|
|
else:
|
|
|
|
shutil.copy(f, username_loc)
|
2025-01-03 07:59:54 +00:00
|
|
|
|
2025-01-03 15:01:01 +00:00
|
|
|
bronze_path = Path(tmp_path) / "bronze"
|
|
|
|
bronze_path.mkdir()
|
2025-01-03 07:59:54 +00:00
|
|
|
silver_path = Path(tmp_path) / "silver"
|
|
|
|
silver_path.mkdir()
|
|
|
|
|
2025-01-03 15:01:01 +00:00
|
|
|
return FSRepository("test", tmp_path, "test")
|
2025-01-03 07:59:54 +00:00
|
|
|
|
2025-01-03 15:01:01 +00:00
|
|
|
|
|
|
|
def test_add_repository(
|
|
|
|
repository: FSRepository,
|
2025-01-03 07:59:54 +00:00
|
|
|
):
|
|
|
|
dp = DataPlateform()
|
2025-01-03 15:01:01 +00:00
|
|
|
dp.add_repository("test", repository)
|
|
|
|
|
|
|
|
assert dp.repositories == ["test"]
|
|
|
|
|
|
|
|
assert dp.repository("test") == repository
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
2025-01-04 14:30:32 +00:00
|
|
|
def foo_flux(repository: FSRepository) -> Flux:
|
|
|
|
src = {"username": repository.table("raw", "username")}
|
|
|
|
targets = {"username": repository.table("bronze", "username")}
|
2025-01-03 15:01:01 +00:00
|
|
|
|
2025-01-04 14:30:32 +00:00
|
|
|
def foo(sources, targets):
|
|
|
|
return {"who": "foo"}
|
2025-01-03 15:01:01 +00:00
|
|
|
|
2025-01-04 14:30:32 +00:00
|
|
|
extra_kwrds = {}
|
|
|
|
|
|
|
|
flux = Flux(
|
|
|
|
sources=src,
|
|
|
|
targets=targets,
|
|
|
|
transformation=Transformation(function=foo, extra_kwrds=extra_kwrds),
|
|
|
|
)
|
|
|
|
return flux
|
2025-01-03 15:01:01 +00:00
|
|
|
|
2025-01-03 07:59:54 +00:00
|
|
|
|
2025-01-04 12:51:24 +00:00
|
|
|
@pytest.fixture
|
|
|
|
def copy_flux(repository: FSRepository) -> Flux:
|
|
|
|
src = {"username": repository.table("raw", "username")}
|
|
|
|
targets = {"username": repository.table("bronze", "username")}
|
|
|
|
|
|
|
|
def copy(sources, targets):
|
|
|
|
pass
|
|
|
|
|
|
|
|
extra_kwrds = {}
|
|
|
|
|
|
|
|
flux = Flux(
|
|
|
|
sources=src,
|
|
|
|
targets=targets,
|
|
|
|
transformation=Transformation(function=copy, extra_kwrds=extra_kwrds),
|
|
|
|
)
|
|
|
|
return flux
|
|
|
|
|
|
|
|
|
2025-01-04 14:30:32 +00:00
|
|
|
def test_add_flux(repository: FSRepository, copy_flux: Flux):
|
|
|
|
dataplatform = DataPlateform()
|
|
|
|
dataplatform.add_repository("test", repository)
|
|
|
|
|
2025-01-04 12:51:24 +00:00
|
|
|
dataplatform.add_flux(name="copy_flux", flux=copy_flux)
|
|
|
|
assert dataplatform.fluxes == ["copy_flux"]
|
|
|
|
dataplatform.add_flux(name="copy_flux_bis", flux=copy_flux)
|
|
|
|
assert dataplatform.fluxes == ["copy_flux", "copy_flux_bis"]
|
|
|
|
|
|
|
|
assert dataplatform.flux("copy_flux") == copy_flux
|
|
|
|
assert dataplatform.flux("copy_flux_bis") == copy_flux
|
2025-01-04 14:30:32 +00:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
def dataplatform(
|
|
|
|
repository: FSRepository,
|
|
|
|
foo_flux: Flux,
|
|
|
|
copy_flux: Flux,
|
|
|
|
) -> DataPlateform:
|
|
|
|
dp = DataPlateform()
|
|
|
|
|
|
|
|
dp.add_repository("test", repository)
|
|
|
|
|
|
|
|
dp.add_flux("foo", foo_flux)
|
|
|
|
dp.add_flux("copy", copy_flux)
|
|
|
|
return dp
|
|
|
|
|
|
|
|
|
|
|
|
def test_listing_content(dataplatform: DataPlateform):
|
|
|
|
assert dataplatform.repository("test").schemas() == ["raw", "bronze", "silver"]
|
|
|
|
assert dataplatform.repository("test").schema("raw").tables == [
|
|
|
|
"recovery",
|
|
|
|
"username",
|
|
|
|
"salary",
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
def test_execute_flux(dataplatform: DataPlateform):
|
|
|
|
meta = dataplatform.execute_flux("foo")
|
|
|
|
assert meta.data == {"who": "foo"}
|