Feat: flux takes list of tables for sources and targets

This commit is contained in:
Bertrand Benjamin 2025-01-05 15:31:40 +01:00
parent 8a43a93cda
commit 09783f9c1e
4 changed files with 45 additions and 33 deletions

View File

@ -3,6 +3,6 @@ from plesna.models.flux import Flux, FluxMetaData
def consume_flux(flux: Flux) -> FluxMetaData: def consume_flux(flux: Flux) -> FluxMetaData:
metadata = flux.transformation.function( metadata = flux.transformation.function(
sources=flux.sources, targets=flux.targets, **flux.transformation.extra_kwrds sources=flux.sources_dict, targets=flux.targets_dict, **flux.transformation.extra_kwrds
) )
return FluxMetaData(data=metadata) return FluxMetaData(data=metadata)

View File

@ -1,14 +1,34 @@
from pydantic import BaseModel from pydantic import BaseModel, computed_field
from plesna.models.storage import Table from plesna.models.storage import Table
from plesna.models.transformation import Transformation from plesna.models.transformation import Transformation
class Flux(BaseModel): class Flux(BaseModel):
sources: dict[str, Table] sources: list[Table]
targets: dict[str, Table] targets: list[Table]
transformation: Transformation transformation: Transformation
@computed_field
@property
def sources_dict(self) -> dict[str, Table]:
return {s.id: s for s in self.sources}
@computed_field
@property
def sources_id(self) -> dict[str, Table]:
return [s.id for s in self.sources]
@computed_field
@property
def targets_id(self) -> dict[str, Table]:
return [s.id for s in self.targets]
@computed_field
@property
def targets_dict(self) -> dict[str, Table]:
return {s.id: s for s in self.targets}
class FluxMetaData(BaseModel): class FluxMetaData(BaseModel):
data: dict data: dict

View File

@ -5,22 +5,14 @@ from plesna.models.transformation import Transformation
def test_consume_flux(): def test_consume_flux():
sources = { sources = [
"src1": Table( Table(id="src1", repo_id="test", schema_id="test", name="test", value="here", datas=["d"]),
id="src1", repo_id="test", schema_id="test", name="test", value="here", datas=["d"] Table(id="src2", repo_id="test", schema_id="test", name="test", value="here", datas=["d"]),
), ]
"src2": Table( targets = [
id="src2", repo_id="test", schema_id="test", name="test", value="here", datas=["d"] Table(id="tgt1", repo_id="test", schema_id="test", name="test", value="this", datas=["d"]),
), Table(id="tgt2", repo_id="test", schema_id="test", name="test", value="that", datas=["d"]),
} ]
targets = {
"tgt1": Table(
id="tgt1", repo_id="test", schema_id="test", name="test", value="this", datas=["d"]
),
"tgt2": Table(
id="tgt2", repo_id="test", schema_id="test", name="test", value="that", datas=["d"]
),
}
def func(sources, targets, **kwrds): def func(sources, targets, **kwrds):
return { return {

View File

@ -41,12 +41,12 @@ def test_add_repository(
@pytest.fixture @pytest.fixture
def copy_flux(repository: FSRepository) -> Flux: def copy_flux(repository: FSRepository) -> Flux:
raw_username = {"username": repository.table("test-raw-username")} raw_username = [repository.table("test-raw-username")]
bronze_username = {"username": repository.table("test-bronze-username")} bronze_username = [repository.table("test-bronze-username")]
def copy(sources, targets): def copy(sources, targets):
src_path = Path(sources["username"].datas[0]) src_path = Path(sources["test-raw-username"].datas[0])
tgt_path = Path(targets["username"].datas[0]) tgt_path = Path(targets["test-bronze-username"].datas[0])
shutil.copy(src_path, tgt_path) shutil.copy(src_path, tgt_path)
return {"src_size": src_path.stat().st_size, "tgt_size": tgt_path.stat().st_size} return {"src_size": src_path.stat().st_size, "tgt_size": tgt_path.stat().st_size}
@ -62,11 +62,11 @@ def copy_flux(repository: FSRepository) -> Flux:
@pytest.fixture @pytest.fixture
def foo_flux(repository: FSRepository) -> Flux: def foo_flux(repository: FSRepository) -> Flux:
src = { src = [
"username": repository.table("test-raw-username"), repository.table("test-raw-username"),
"recovery": repository.table("test-raw-recovery"), repository.table("test-raw-recovery"),
} ]
targets = {"username_foo": repository.table("test-bronze-foo")} targets = [repository.table("test-bronze-foo")]
def foo(sources, targets): def foo(sources, targets):
return {"who": "foo"} return {"who": "foo"}
@ -131,10 +131,10 @@ def test_content_from_graph(dataplatform: DataPlateform):
Node(name="test-raw-username", infos={}), Node(name="test-raw-username", infos={}),
} }
# assert dataplatform.graphset.node_sets == { assert dataplatform.graphset.node_sets == {
# Node(name="test-raw-username", infos={}), Node(name="test-raw-username", infos={}),
# Node(name="test-bronze-username", infos={}), Node(name="test-bronze-username", infos={}),
# } }
def test_execute_flux(dataplatform: DataPlateform): def test_execute_flux(dataplatform: DataPlateform):