feat: build dataplatform graph and graphset dynamicaly

This commit is contained in:
Bertrand Benjamin 2025-01-05 16:25:22 +01:00
parent f2ed76c8aa
commit 44a7eed5b4

View File

@ -12,31 +12,15 @@ class DataPlateformError(Exception):
class DataPlateform:
def __init__(self):
self._graphset = GraphSet()
self._graph = Graph()
self._metadata_engine = ""
self._fluxes = {}
self._repositories = {}
@property
def graphset(self) -> GraphSet:
return self._graphset
@property
def graph(self) -> Graph:
return self._graph
def repository_graph_feed(self, repository_id: str):
for schema in self._repositories[repository_id].schemas():
for table in self._repositories[repository_id].tables(schema):
self._graph.add_node(Node(name=table))
def add_repository(self, repository: Repository) -> str:
if repository.id in self._repositories:
raise DataPlateformError("The repository {repository.id} already exists")
self._repositories[repository.id] = repository
self.repository_graph_feed(repository.id)
return repository.id
@property
@ -51,14 +35,8 @@ class DataPlateform:
raise DataPlateformError("The flux {name} already exists")
self._fluxes[name] = flux
self.flux_graphset_feed(name)
return name
def flux_graphset_feed(self, flux_name: str):
flux = self.flux(flux_name)
edge = flux_to_edgeonset(flux)
self._graphset.append(edge)
@property
def fluxes(self) -> list[str]:
return list(self._fluxes)
@ -70,3 +48,21 @@ class DataPlateform:
if name not in self._fluxes:
raise DataPlateformError("The flux {name} is not registered")
return consume_flux(self._fluxes[name])
@property
def graphset(self) -> GraphSet:
graphset = GraphSet()
for flux in self._fluxes.values():
edge = flux_to_edgeonset(flux)
graphset.append(edge)
return graphset
@property
def graph(self) -> Graph:
graph = Graph()
for repo in self._repositories.values():
for schema in repo.schemas():
for table in repo.tables(schema):
graph.add_node(Node(name=table))
return graph