From 13f80d85534b65b3c75e4c5c79ef387329b7f3ab Mon Sep 17 00:00:00 2001 From: Bertrand Benjamin Date: Sat, 27 Jul 2024 17:39:09 +0200 Subject: [PATCH] feat: add schema and table listing --- dashboard/app.py | 3 ++ dashboard/datalake.py | 13 +++++++++ dashboard/libs/__init__.py | 0 dashboard/libs/fs_schema.py | 35 +++++++++++++++++++++++ dashboard/libs/stage/fs_stage.py | 48 ++++++++++++++++++++++++++++++++ dashboard/libs/stage/metadata.py | 5 ++++ dashboard/libs/stage/stage.py | 36 ++++++++++++++++++++++++ dashboard/pages/__init__.py | 0 dashboard/pages/config.py | 3 -- dashboard/pages/home.py | 48 ++++++++++++++++++++++++++++++-- 10 files changed, 185 insertions(+), 6 deletions(-) create mode 100644 dashboard/datalake.py create mode 100644 dashboard/libs/__init__.py create mode 100644 dashboard/libs/fs_schema.py create mode 100644 dashboard/libs/stage/fs_stage.py create mode 100644 dashboard/libs/stage/metadata.py create mode 100644 dashboard/libs/stage/stage.py create mode 100644 dashboard/pages/__init__.py diff --git a/dashboard/app.py b/dashboard/app.py index 7af5c57..54fcc52 100644 --- a/dashboard/app.py +++ b/dashboard/app.py @@ -1,7 +1,10 @@ import dash from dash import Dash, html, dcc +from .pages import home, config app = Dash(__name__, use_pages=True) +dash.register_page(home.__name__, path='/', layout=home.layout) +dash.register_page(config.__name__, path='/config', layout=config.layout) app.layout = html.Div([ html.H1('Plesna'), diff --git a/dashboard/datalake.py b/dashboard/datalake.py new file mode 100644 index 0000000..9fff53f --- /dev/null +++ b/dashboard/datalake.py @@ -0,0 +1,13 @@ +from .libs.stage.fs_stage import FSStage +from dotenv import dotenv_values + +env = { + **dotenv_values(".env"), +} + +stages = { + "raw": FSStage(f"{env['DATA_PATH']}/{env['RAW_SUBPATH']}"), + "staging": FSStage(f"{env['DATA_PATH']}/{env['STAGING_SUBPATH']}"), + "gold": FSStage(f"{env['DATA_PATH']}/{env['GOLD_SUBPATH']}"), + "mart": FSStage(f"{env['DATA_PATH']}/{env['MART_SUBPATH']}"), +} diff --git a/dashboard/libs/__init__.py b/dashboard/libs/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dashboard/libs/fs_schema.py b/dashboard/libs/fs_schema.py new file mode 100644 index 0000000..4836858 --- /dev/null +++ b/dashboard/libs/fs_schema.py @@ -0,0 +1,35 @@ +from .schema import AbstractSchema +from pathlib import Path + +class FSSchema(AbstractSchema): + def __init__(self, basepath, metadata_engine=None): + self.basepath = basepath + self._metadata_engine = metadata_engine + + def ls(self, dir, only_files=True): + dirpath = Path(dir) + if only_files: + return [f for f in dirpath.iterdir() if f.is_dir()] + return [f for f in dirpath.iterdir()] + + def tables(self, dir, only_files=True): + dirpath = Path(dir) + if only_files: + return [f for f in dirpath.iterdir() if f.is_dir()] + return [f for f in dirpath.iterdir()] + + def info(self, path): + path = Path(path) + pass + + def read(self, path): + path = Path(path) + pass + + def write(self, path, content): + path = Path(path) + pass + + def delete(self, path): + path = Path(path) + pass diff --git a/dashboard/libs/stage/fs_stage.py b/dashboard/libs/stage/fs_stage.py new file mode 100644 index 0000000..6c89031 --- /dev/null +++ b/dashboard/libs/stage/fs_stage.py @@ -0,0 +1,48 @@ +from .stage import AbstractStage +from pathlib import Path + +class FSStage(AbstractStage): + def __init__(self, basepath, metadata_engine=None): + self.basepath = Path(basepath) + self._metadata_engine = metadata_engine + + def ls(self, dir, only_files=False, only_directories=False) -> list[str]: + dirpath = Path(dir) + + if only_files: + return [str(f.relative_to(dirpath)) for f in dirpath.iterdir() if not f.is_dir()] + + if only_directories: + return [str(f.relative_to(dirpath)) for f in dirpath.iterdir() if f.is_dir()] + + return [str(f.relative_to(dirpath)) for f in dirpath.iterdir()] + + def schemas(self) -> list[str]: + dirpath = self.basepath + return self.ls(dirpath, only_directories=True) + + def tables(self, schema:str) -> list[str]: + dirpath = self.basepath / schema + return self.ls(dirpath, only_files=True) + + def build_table_path(self, table:str, schema:str): + table_path = self.basepath + if schema == '': + return table_path / table + return table_path / schema / table + + def info(self, table:str, schema:str=''): + table_path = self.build_table_path(table, schema) + pass + + def read(self, table:str, schema:str=''): + table_path = self.build_table_path(table, schema) + pass + + def write(self, table:str, content, schema:str=''): + table_path = self.build_table_path(table, schema) + pass + + def delete(self, table:str, schema:str=''): + table_path = self.build_table_path(table, schema) + pass diff --git a/dashboard/libs/stage/metadata.py b/dashboard/libs/stage/metadata.py new file mode 100644 index 0000000..7a2fe91 --- /dev/null +++ b/dashboard/libs/stage/metadata.py @@ -0,0 +1,5 @@ +from abc import ABC + + +class AbstractMetadataEngine(ABC): + pass diff --git a/dashboard/libs/stage/stage.py b/dashboard/libs/stage/stage.py new file mode 100644 index 0000000..993c18d --- /dev/null +++ b/dashboard/libs/stage/stage.py @@ -0,0 +1,36 @@ +import abc +from .metadata import AbstractMetadataEngine + + +class AbstractStage(abc.ABC): + metadata_engine = AbstractMetadataEngine + + @abc.abstractmethod + def schemas(): + """ List schemas """ + raise NotImplementedError + + @abc.abstractmethod + def tables(schema): + """ List table in schema""" + raise NotImplementedError + + @abc.abstractmethod + def info(self, path): + """ Get infos about a file""" + raise NotImplementedError + + @abc.abstractmethod + def read(self, path): + """ Get content of a file""" + raise NotImplementedError + + @abc.abstractmethod + def write(self, path, content): + """ Write content into the file""" + raise NotImplementedError + + @abc.abstractmethod + def delete(self, path): + """ Delete the file """ + raise NotImplementedError diff --git a/dashboard/pages/__init__.py b/dashboard/pages/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dashboard/pages/config.py b/dashboard/pages/config.py index 47a3bf7..4c16e64 100644 --- a/dashboard/pages/config.py +++ b/dashboard/pages/config.py @@ -1,4 +1,3 @@ -import dash from dash import html from dotenv import dotenv_values import os @@ -9,8 +8,6 @@ env = { } -dash.register_page(__name__, path='/config') - layout = html.Div([ html.H1('This is our Config page'), html.Ul(children = [html.Li(f"{k} = {v}") for k,v in env.items()]), diff --git a/dashboard/pages/home.py b/dashboard/pages/home.py index 69f06c4..108416a 100644 --- a/dashboard/pages/home.py +++ b/dashboard/pages/home.py @@ -1,9 +1,51 @@ -import dash from dash import html +from ..datalake import stages +from ..libs.stage.stage import AbstractStage + + +def html_list_schema(stage:AbstractStage, with_tables=True): + """ Build html list of schema in stage """ + if with_tables: + return html.Ul( + [ + html.Li( + children = [ + html.Span(schema), + html_list_table(stage, schema) + ] + ) for schema in stage.schemas() + ] + ) + return html.Ul( + [ + html.Li(schema) for schema in stage.schemas() + ] + ) + + + +def html_list_table(stage:AbstractStage, schema:str): + """ Build html list of table in stage """ + return html.Ul( + [ + html.Li(table) for table in stage.tables(schema=schema) + ] + ) + -dash.register_page(__name__, path='/') layout = html.Div([ html.H1('This is our Home page'), - html.Div('This is our Home page content.'), + html.Div(children=[ + html.Ul( + children=[ + html.Li( + children=[ + html.Span(stagename), + html_list_schema(stage) + ] + ) for stagename, stage in stages.items() + ] + ) + ]), ])