Feat: add repository to dataplatform

This commit is contained in:
Bertrand Benjamin 2025-01-03 16:01:01 +01:00
parent 78d6ac12bf
commit beb9fd5465
2 changed files with 64 additions and 33 deletions

View File

@ -1,5 +1,5 @@
from plesna.datastore.datacatalogue import DataCatalogue
from plesna.graph.graph_set import GraphSet
from plesna.storage.repository.repository import Repository
class DataPlateformError(Exception):
@ -11,17 +11,17 @@ class DataPlateform:
self._graphset = GraphSet()
self._metadata_engine = ""
self._transformations = {}
self._datacatalogues = {}
self._repositories = {}
def add_datacatalague(self, name: str, datacatalogue: DataCatalogue):
if name in self._datacatalogues:
raise DataPlateformError("The datacatalogue {name} already exists")
def add_repository(self, name: str, repository: Repository):
if name in self._repositories:
raise DataPlateformError("The repository {name} already exists")
self._datacatalogues[name] = datacatalogue
self._repositories[name] = repository
@property
def datacatalogues(self):
return list(self._datacatalogues)
def repositories(self) -> list[str]:
return list(self._repositories)
def get_datacatalogue(self, name: str):
return self._datacatalogues[name]
def repository(self, name: str) -> Repository:
return self._repositories[name]

View File

@ -1,43 +1,74 @@
import shutil
from pathlib import Path
import pytest
from plesna.dataplatform import DataPlateform
from plesna.datastore.fs_datacatalogue import FSDataCatalogue
from plesna.storage.repository.fs_repository import FSRepository
FIXTURE_DIR = Path(__file__).parent / Path("raw_data")
FIXTURE_DIR = Path(__file__).parent.parent / Path("raw_datas")
@pytest.fixture
def raw_catalogue(tmp_path):
def repository(tmp_path) -> FSRepository:
raw_path = Path(tmp_path) / "raw"
raw_path.mkdir()
return FSDataCatalogue("raw", raw_path)
example_src = FIXTURE_DIR
assert example_src.exists()
recovery_loc = raw_path / "recovery"
recovery_loc.mkdir()
username_loc = raw_path / "username"
username_loc.mkdir()
salary_loc = raw_path / "salary"
salary_loc.mkdir()
for f in example_src.glob("*"):
if "recovery" in str(f):
shutil.copy(f, recovery_loc)
if "salary" in str(f):
shutil.copy(f, salary_loc)
else:
shutil.copy(f, username_loc)
@pytest.fixture
def bronze_catalogue(tmp_path):
bronze_path = Path(tmp_path) / "bronze"
bronze_path.mkdir()
return FSDataCatalogue("bronze", bronze_path)
silver_path = Path(tmp_path) / "silver"
silver_path.mkdir()
return FSRepository("test", tmp_path, "test")
def test_add_repository(
repository: FSRepository,
):
dp = DataPlateform()
dp.add_repository("test", repository)
assert dp.repositories == ["test"]
assert dp.repository("test") == repository
@pytest.fixture
def silver_catalogue(tmp_path):
silver_path = Path(tmp_path) / "silver"
silver_path.mkdir()
return FSDataCatalogue("silver", silver_path)
def test_add_catalogue(
raw_catalogue: FSDataCatalogue,
bronze_catalogue: FSDataCatalogue,
silver_catalogue: FSDataCatalogue,
):
def dataplatform(
repository: FSRepository,
) -> DataPlateform:
dp = DataPlateform()
dp.add_datacatalague("raw", raw_catalogue)
dp.add_datacatalague("bronze", bronze_catalogue)
dp.add_datacatalague("silver", silver_catalogue)
dp.add_repository("test", repository)
return dp
assert dp.datacatalogues == ["raw", "bronze", "silver"]
assert dp.get_datacatalogue("raw") == raw_catalogue
def test_listing_content(dataplatform: DataPlateform):
assert dataplatform.repository("test").schemas() == ["raw", "bronze", "silver"]
assert dataplatform.repository("test").schema("raw").tables == [
"recovery",
"username",
"salary",
]
def test_add_flux(dataplatform: DataPlateform):
# dataplatform.add_flux()
pass