Feat: flux takes list of tables for sources and targets

This commit is contained in:
Bertrand Benjamin 2025-01-05 15:31:40 +01:00
parent 8a43a93cda
commit 09783f9c1e
4 changed files with 45 additions and 33 deletions

View File

@ -3,6 +3,6 @@ from plesna.models.flux import Flux, FluxMetaData
def consume_flux(flux: Flux) -> FluxMetaData:
metadata = flux.transformation.function(
sources=flux.sources, targets=flux.targets, **flux.transformation.extra_kwrds
sources=flux.sources_dict, targets=flux.targets_dict, **flux.transformation.extra_kwrds
)
return FluxMetaData(data=metadata)

View File

@ -1,14 +1,34 @@
from pydantic import BaseModel
from pydantic import BaseModel, computed_field
from plesna.models.storage import Table
from plesna.models.transformation import Transformation
class Flux(BaseModel):
sources: dict[str, Table]
targets: dict[str, Table]
sources: list[Table]
targets: list[Table]
transformation: Transformation
@computed_field
@property
def sources_dict(self) -> dict[str, Table]:
return {s.id: s for s in self.sources}
@computed_field
@property
def sources_id(self) -> dict[str, Table]:
return [s.id for s in self.sources]
@computed_field
@property
def targets_id(self) -> dict[str, Table]:
return [s.id for s in self.targets]
@computed_field
@property
def targets_dict(self) -> dict[str, Table]:
return {s.id: s for s in self.targets}
class FluxMetaData(BaseModel):
data: dict

View File

@ -5,22 +5,14 @@ from plesna.models.transformation import Transformation
def test_consume_flux():
sources = {
"src1": Table(
id="src1", repo_id="test", schema_id="test", name="test", value="here", datas=["d"]
),
"src2": Table(
id="src2", repo_id="test", schema_id="test", name="test", value="here", datas=["d"]
),
}
targets = {
"tgt1": Table(
id="tgt1", repo_id="test", schema_id="test", name="test", value="this", datas=["d"]
),
"tgt2": Table(
id="tgt2", repo_id="test", schema_id="test", name="test", value="that", datas=["d"]
),
}
sources = [
Table(id="src1", repo_id="test", schema_id="test", name="test", value="here", datas=["d"]),
Table(id="src2", repo_id="test", schema_id="test", name="test", value="here", datas=["d"]),
]
targets = [
Table(id="tgt1", repo_id="test", schema_id="test", name="test", value="this", datas=["d"]),
Table(id="tgt2", repo_id="test", schema_id="test", name="test", value="that", datas=["d"]),
]
def func(sources, targets, **kwrds):
return {

View File

@ -41,12 +41,12 @@ def test_add_repository(
@pytest.fixture
def copy_flux(repository: FSRepository) -> Flux:
raw_username = {"username": repository.table("test-raw-username")}
bronze_username = {"username": repository.table("test-bronze-username")}
raw_username = [repository.table("test-raw-username")]
bronze_username = [repository.table("test-bronze-username")]
def copy(sources, targets):
src_path = Path(sources["username"].datas[0])
tgt_path = Path(targets["username"].datas[0])
src_path = Path(sources["test-raw-username"].datas[0])
tgt_path = Path(targets["test-bronze-username"].datas[0])
shutil.copy(src_path, tgt_path)
return {"src_size": src_path.stat().st_size, "tgt_size": tgt_path.stat().st_size}
@ -62,11 +62,11 @@ def copy_flux(repository: FSRepository) -> Flux:
@pytest.fixture
def foo_flux(repository: FSRepository) -> Flux:
src = {
"username": repository.table("test-raw-username"),
"recovery": repository.table("test-raw-recovery"),
}
targets = {"username_foo": repository.table("test-bronze-foo")}
src = [
repository.table("test-raw-username"),
repository.table("test-raw-recovery"),
]
targets = [repository.table("test-bronze-foo")]
def foo(sources, targets):
return {"who": "foo"}
@ -131,10 +131,10 @@ def test_content_from_graph(dataplatform: DataPlateform):
Node(name="test-raw-username", infos={}),
}
# assert dataplatform.graphset.node_sets == {
# Node(name="test-raw-username", infos={}),
# Node(name="test-bronze-username", infos={}),
# }
assert dataplatform.graphset.node_sets == {
Node(name="test-raw-username", infos={}),
Node(name="test-bronze-username", infos={}),
}
def test_execute_flux(dataplatform: DataPlateform):