refact: use repository id in dataplatform

This commit is contained in:
2025-01-05 14:55:46 +01:00
parent d256fbf169
commit ae61fd3c12
6 changed files with 80 additions and 40 deletions

View File

@@ -4,6 +4,7 @@ from pathlib import Path
import pytest
from plesna.dataplatform import DataPlateform
from plesna.graph.graph import Node
from plesna.models.flux import Flux
from plesna.models.transformation import Transformation
from plesna.storage.repository.fs_repository import FSRepository
@@ -24,38 +25,20 @@ def repository(tmp_path) -> FSRepository:
silver_path = Path(tmp_path) / "silver"
silver_path.mkdir()
return FSRepository("test", tmp_path, "test")
return FSRepository("test", "test", tmp_path)
def test_add_repository(
repository: FSRepository,
):
dp = DataPlateform()
dp.add_repository("test", repository)
dp.add_repository(repository)
assert dp.repositories == ["test"]
assert dp.repository("test") == repository
@pytest.fixture
def foo_flux(repository: FSRepository) -> Flux:
src = {"username": repository.table("test-raw-username")}
targets = {"username": repository.table("test-bronze-username")}
def foo(sources, targets):
return {"who": "foo"}
extra_kwrds = {}
flux = Flux(
sources=src,
targets=targets,
transformation=Transformation(function=foo, extra_kwrds=extra_kwrds),
)
return flux
@pytest.fixture
def copy_flux(repository: FSRepository) -> Flux:
raw_username = {"username": repository.table("test-raw-username")}
@@ -77,9 +60,30 @@ def copy_flux(repository: FSRepository) -> Flux:
return raw_brz_copy_username
@pytest.fixture
def foo_flux(repository: FSRepository) -> Flux:
src = {
"username": repository.table("test-raw-username"),
"recovery": repository.table("test-raw-recovery"),
}
targets = {"username_foo": repository.table("test-bronze-foo")}
def foo(sources, targets):
return {"who": "foo"}
extra_kwrds = {}
flux = Flux(
sources=src,
targets=targets,
transformation=Transformation(function=foo, extra_kwrds=extra_kwrds),
)
return flux
def test_add_flux(repository: FSRepository, copy_flux: Flux):
dataplatform = DataPlateform()
dataplatform.add_repository("test", repository)
dataplatform.add_repository(repository)
dataplatform.add_flux(name="copy_flux", flux=copy_flux)
assert dataplatform.fluxes == ["copy_flux"]
@@ -98,7 +102,7 @@ def dataplatform(
) -> DataPlateform:
dp = DataPlateform()
dp.add_repository("test", repository)
dp.add_repository(repository)
dp.add_flux("foo", foo_flux)
dp.add_flux("raw_brz_copy_username", copy_flux)
@@ -121,8 +125,16 @@ def test_listing_content(dataplatform: DataPlateform):
def test_content_from_graph(dataplatform: DataPlateform):
# assert dataplatform.graphset.model_dump() == {}
pass
assert dataplatform.graph.nodes == {
Node(name="test-raw-recovery", infos={}),
Node(name="test-raw-salary", infos={}),
Node(name="test-raw-username", infos={}),
}
# assert dataplatform.graphset.node_sets == {
# Node(name="test-raw-username", infos={}),
# Node(name="test-bronze-username", infos={}),
# }
def test_execute_flux(dataplatform: DataPlateform):

View File

@@ -20,7 +20,7 @@ def location(tmp_path):
def test_init(location):
repo = FSRepository("example", location, "example")
repo = FSRepository("example", "example", location)
assert repo.ls() == [
"schema",
]
@@ -45,7 +45,7 @@ def test_init(location):
@pytest.fixture
def repository(location) -> FSRepository:
return FSRepository("example", location, "example")
return FSRepository("example", "example", location)
def test_list_schemas(repository):