plesna/dashboard/libs/repository/fs_repository.py

70 lines
2.1 KiB
Python
Raw Normal View History

2024-07-27 15:39:09 +00:00
from pathlib import Path
2024-08-07 09:39:33 +00:00
2024-07-28 15:34:56 +00:00
import pandas as pd
2024-07-27 15:39:09 +00:00
2024-08-07 09:39:33 +00:00
from .repository import AbstractRepository
class FSRepository(AbstractRepository):
2024-07-27 17:19:36 +00:00
def __init__(self, name, basepath, metadata_engine=None):
self.name = name
2024-07-27 15:39:09 +00:00
self.basepath = Path(basepath)
2024-08-14 08:44:38 +00:00
assert self.basepath.exists()
2024-07-27 15:39:09 +00:00
self._metadata_engine = metadata_engine
2024-08-14 08:44:38 +00:00
def ls(
self, dir="", only_files=False, only_directories=False, recursive=False
) -> list[str]:
dirpath = self.basepath / dir
2024-07-27 15:39:09 +00:00
if only_files:
2024-08-14 08:44:38 +00:00
return [
str(f.relative_to(dirpath)) for f in dirpath.iterdir() if not f.is_dir()
]
2024-07-27 15:39:09 +00:00
if only_directories:
2024-07-27 15:50:29 +00:00
if recursive:
return [str(f[0].relative_to(dirpath)) for f in dirpath.walk()]
2024-08-14 08:44:38 +00:00
return [
str(f.relative_to(dirpath)) for f in dirpath.iterdir() if f.is_dir()
]
2024-07-27 15:39:09 +00:00
return [str(f.relative_to(dirpath)) for f in dirpath.iterdir()]
2024-07-27 15:50:29 +00:00
def schemas(self, recursive=True) -> list[str]:
2024-08-14 08:44:38 +00:00
return self.ls("", only_directories=True, recursive=True)
2024-07-27 15:39:09 +00:00
2024-08-14 08:44:38 +00:00
def tables(self, schema: str = ".") -> list[str]:
return self.ls(schema, only_files=True)
2024-07-27 15:39:09 +00:00
2024-08-14 08:44:38 +00:00
def build_table_path(self, table: str, schema: str):
2024-07-27 15:39:09 +00:00
table_path = self.basepath
2024-08-14 08:44:38 +00:00
if schema == ".":
2024-07-27 15:39:09 +00:00
return table_path / table
return table_path / schema / table
2024-08-14 08:44:38 +00:00
def infos(self, table: str, schema: str = "."):
2024-07-27 15:39:09 +00:00
table_path = self.build_table_path(table, schema)
pass
2024-08-14 08:44:38 +00:00
def read(self, table: str, schema: str = ".", read_options={}):
2024-07-27 15:39:09 +00:00
table_path = self.build_table_path(table, schema)
2024-07-28 15:34:56 +00:00
extension = table_path.suffix
2024-08-14 08:44:38 +00:00
if extension == ".csv":
2024-07-28 15:34:56 +00:00
return pd.read_csv(table_path, **read_options)
2024-08-14 08:44:38 +00:00
if extension == ".xlsx":
2024-07-28 15:34:56 +00:00
return pd.read_excel(table_path, **read_options)
raise ValueError("Can't open the table")
2024-07-27 15:39:09 +00:00
2024-08-14 08:44:38 +00:00
def write(self, content, table: str, schema: str = "."):
2024-07-27 15:39:09 +00:00
table_path = self.build_table_path(table, schema)
pass
2024-08-14 08:44:38 +00:00
def delete_table(self, table: str, schema: str = "."):
2024-07-27 15:39:09 +00:00
table_path = self.build_table_path(table, schema)
pass