Feat: add repository to dataplatform
This commit is contained in:
parent
78d6ac12bf
commit
beb9fd5465
@ -1,5 +1,5 @@
|
|||||||
from plesna.datastore.datacatalogue import DataCatalogue
|
|
||||||
from plesna.graph.graph_set import GraphSet
|
from plesna.graph.graph_set import GraphSet
|
||||||
|
from plesna.storage.repository.repository import Repository
|
||||||
|
|
||||||
|
|
||||||
class DataPlateformError(Exception):
|
class DataPlateformError(Exception):
|
||||||
@ -11,17 +11,17 @@ class DataPlateform:
|
|||||||
self._graphset = GraphSet()
|
self._graphset = GraphSet()
|
||||||
self._metadata_engine = ""
|
self._metadata_engine = ""
|
||||||
self._transformations = {}
|
self._transformations = {}
|
||||||
self._datacatalogues = {}
|
self._repositories = {}
|
||||||
|
|
||||||
def add_datacatalague(self, name: str, datacatalogue: DataCatalogue):
|
def add_repository(self, name: str, repository: Repository):
|
||||||
if name in self._datacatalogues:
|
if name in self._repositories:
|
||||||
raise DataPlateformError("The datacatalogue {name} already exists")
|
raise DataPlateformError("The repository {name} already exists")
|
||||||
|
|
||||||
self._datacatalogues[name] = datacatalogue
|
self._repositories[name] = repository
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def datacatalogues(self):
|
def repositories(self) -> list[str]:
|
||||||
return list(self._datacatalogues)
|
return list(self._repositories)
|
||||||
|
|
||||||
def get_datacatalogue(self, name: str):
|
def repository(self, name: str) -> Repository:
|
||||||
return self._datacatalogues[name]
|
return self._repositories[name]
|
||||||
|
@ -1,43 +1,74 @@
|
|||||||
|
import shutil
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from plesna.dataplatform import DataPlateform
|
from plesna.dataplatform import DataPlateform
|
||||||
from plesna.datastore.fs_datacatalogue import FSDataCatalogue
|
from plesna.storage.repository.fs_repository import FSRepository
|
||||||
|
|
||||||
FIXTURE_DIR = Path(__file__).parent / Path("raw_data")
|
FIXTURE_DIR = Path(__file__).parent.parent / Path("raw_datas")
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def raw_catalogue(tmp_path):
|
def repository(tmp_path) -> FSRepository:
|
||||||
raw_path = Path(tmp_path) / "raw"
|
raw_path = Path(tmp_path) / "raw"
|
||||||
raw_path.mkdir()
|
raw_path.mkdir()
|
||||||
return FSDataCatalogue("raw", raw_path)
|
|
||||||
|
|
||||||
|
example_src = FIXTURE_DIR
|
||||||
|
assert example_src.exists()
|
||||||
|
|
||||||
|
recovery_loc = raw_path / "recovery"
|
||||||
|
recovery_loc.mkdir()
|
||||||
|
username_loc = raw_path / "username"
|
||||||
|
username_loc.mkdir()
|
||||||
|
salary_loc = raw_path / "salary"
|
||||||
|
salary_loc.mkdir()
|
||||||
|
|
||||||
|
for f in example_src.glob("*"):
|
||||||
|
if "recovery" in str(f):
|
||||||
|
shutil.copy(f, recovery_loc)
|
||||||
|
if "salary" in str(f):
|
||||||
|
shutil.copy(f, salary_loc)
|
||||||
|
else:
|
||||||
|
shutil.copy(f, username_loc)
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def bronze_catalogue(tmp_path):
|
|
||||||
bronze_path = Path(tmp_path) / "bronze"
|
bronze_path = Path(tmp_path) / "bronze"
|
||||||
bronze_path.mkdir()
|
bronze_path.mkdir()
|
||||||
return FSDataCatalogue("bronze", bronze_path)
|
silver_path = Path(tmp_path) / "silver"
|
||||||
|
silver_path.mkdir()
|
||||||
|
|
||||||
|
return FSRepository("test", tmp_path, "test")
|
||||||
|
|
||||||
|
|
||||||
|
def test_add_repository(
|
||||||
|
repository: FSRepository,
|
||||||
|
):
|
||||||
|
dp = DataPlateform()
|
||||||
|
dp.add_repository("test", repository)
|
||||||
|
|
||||||
|
assert dp.repositories == ["test"]
|
||||||
|
|
||||||
|
assert dp.repository("test") == repository
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def silver_catalogue(tmp_path):
|
def dataplatform(
|
||||||
silver_path = Path(tmp_path) / "silver"
|
repository: FSRepository,
|
||||||
silver_path.mkdir()
|
) -> DataPlateform:
|
||||||
return FSDataCatalogue("silver", silver_path)
|
|
||||||
|
|
||||||
|
|
||||||
def test_add_catalogue(
|
|
||||||
raw_catalogue: FSDataCatalogue,
|
|
||||||
bronze_catalogue: FSDataCatalogue,
|
|
||||||
silver_catalogue: FSDataCatalogue,
|
|
||||||
):
|
|
||||||
dp = DataPlateform()
|
dp = DataPlateform()
|
||||||
dp.add_datacatalague("raw", raw_catalogue)
|
dp.add_repository("test", repository)
|
||||||
dp.add_datacatalague("bronze", bronze_catalogue)
|
return dp
|
||||||
dp.add_datacatalague("silver", silver_catalogue)
|
|
||||||
|
|
||||||
assert dp.datacatalogues == ["raw", "bronze", "silver"]
|
|
||||||
assert dp.get_datacatalogue("raw") == raw_catalogue
|
def test_listing_content(dataplatform: DataPlateform):
|
||||||
|
assert dataplatform.repository("test").schemas() == ["raw", "bronze", "silver"]
|
||||||
|
assert dataplatform.repository("test").schema("raw").tables == [
|
||||||
|
"recovery",
|
||||||
|
"username",
|
||||||
|
"salary",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_add_flux(dataplatform: DataPlateform):
|
||||||
|
# dataplatform.add_flux()
|
||||||
|
pass
|
||||||
|
Loading…
Reference in New Issue
Block a user