2024-07-27 15:39:09 +00:00
|
|
|
from pathlib import Path
|
2024-08-07 09:39:33 +00:00
|
|
|
|
2024-07-28 15:34:56 +00:00
|
|
|
import pandas as pd
|
2024-07-27 15:39:09 +00:00
|
|
|
|
2024-08-07 09:39:33 +00:00
|
|
|
from .repository import AbstractRepository
|
|
|
|
|
2024-10-07 03:27:46 +00:00
|
|
|
ACCEPTABLE_EXTENTIONS = {
|
|
|
|
"csv": [".csv"],
|
|
|
|
"excel": [".xls", ".xlsx"],
|
|
|
|
}
|
2024-08-07 09:39:33 +00:00
|
|
|
|
|
|
|
class FSRepository(AbstractRepository):
|
2024-07-27 17:19:36 +00:00
|
|
|
def __init__(self, name, basepath, metadata_engine=None):
|
|
|
|
self.name = name
|
|
|
|
|
2024-07-27 15:39:09 +00:00
|
|
|
self.basepath = Path(basepath)
|
2024-08-14 08:44:38 +00:00
|
|
|
assert self.basepath.exists()
|
2024-07-27 15:39:09 +00:00
|
|
|
self._metadata_engine = metadata_engine
|
|
|
|
|
2024-08-14 08:44:38 +00:00
|
|
|
def ls(
|
|
|
|
self, dir="", only_files=False, only_directories=False, recursive=False
|
|
|
|
) -> list[str]:
|
|
|
|
dirpath = self.basepath / dir
|
2024-07-27 15:39:09 +00:00
|
|
|
|
|
|
|
if only_files:
|
2024-08-14 08:44:38 +00:00
|
|
|
return [
|
2024-10-07 03:27:46 +00:00
|
|
|
str(f.relative_to(dirpath))
|
|
|
|
for f in dirpath.iterdir()
|
|
|
|
if not f.is_dir() and not str(f).startswith(".")
|
2024-08-14 08:44:38 +00:00
|
|
|
]
|
2024-07-27 15:39:09 +00:00
|
|
|
|
|
|
|
if only_directories:
|
2024-07-27 15:50:29 +00:00
|
|
|
if recursive:
|
2024-10-07 03:27:46 +00:00
|
|
|
return [
|
|
|
|
str(f[0].relative_to(dirpath))
|
|
|
|
for f in dirpath.walk()
|
|
|
|
if not str(f).startswith(".")
|
|
|
|
]
|
2024-07-27 15:50:29 +00:00
|
|
|
|
2024-08-14 08:44:38 +00:00
|
|
|
return [
|
2024-10-07 03:27:46 +00:00
|
|
|
str(f.relative_to(dirpath))
|
|
|
|
for f in dirpath.iterdir()
|
|
|
|
if f.is_dir() and not str(f).startswith(".")
|
2024-08-14 08:44:38 +00:00
|
|
|
]
|
2024-07-27 15:39:09 +00:00
|
|
|
|
2024-10-07 03:27:46 +00:00
|
|
|
return [
|
|
|
|
str(f.relative_to(dirpath))
|
|
|
|
for f in dirpath.iterdir()
|
|
|
|
if not str(f).startswith(".")
|
|
|
|
]
|
2024-07-27 15:39:09 +00:00
|
|
|
|
2024-07-27 15:50:29 +00:00
|
|
|
def schemas(self, recursive=True) -> list[str]:
|
2024-08-14 08:44:38 +00:00
|
|
|
return self.ls("", only_directories=True, recursive=True)
|
2024-07-27 15:39:09 +00:00
|
|
|
|
2024-08-14 08:44:38 +00:00
|
|
|
def tables(self, schema: str = ".") -> list[str]:
|
|
|
|
return self.ls(schema, only_files=True)
|
2024-07-27 15:39:09 +00:00
|
|
|
|
2024-08-14 08:44:38 +00:00
|
|
|
def build_table_path(self, table: str, schema: str):
|
2024-07-27 15:39:09 +00:00
|
|
|
table_path = self.basepath
|
2024-08-14 08:44:38 +00:00
|
|
|
if schema == ".":
|
2024-07-27 15:39:09 +00:00
|
|
|
return table_path / table
|
|
|
|
return table_path / schema / table
|
|
|
|
|
2024-08-14 08:44:38 +00:00
|
|
|
def infos(self, table: str, schema: str = "."):
|
2024-07-27 15:39:09 +00:00
|
|
|
table_path = self.build_table_path(table, schema)
|
|
|
|
pass
|
|
|
|
|
2024-10-07 03:27:46 +00:00
|
|
|
def read(self, table: str, schema: str = ".", **read_options):
|
2024-07-27 15:39:09 +00:00
|
|
|
table_path = self.build_table_path(table, schema)
|
2024-10-07 03:27:46 +00:00
|
|
|
assert table_path.exists()
|
2024-07-28 15:34:56 +00:00
|
|
|
extension = table_path.suffix
|
2024-10-07 03:27:46 +00:00
|
|
|
if extension in ACCEPTABLE_EXTENTIONS["csv"]:
|
2024-07-28 15:34:56 +00:00
|
|
|
return pd.read_csv(table_path, **read_options)
|
|
|
|
|
2024-10-07 03:27:46 +00:00
|
|
|
if extension in ACCEPTABLE_EXTENTIONS["excel"]:
|
|
|
|
return pd.read_excel(table_path, engine = "openpyxl", **read_options)
|
2024-07-28 15:34:56 +00:00
|
|
|
|
2024-10-07 03:27:46 +00:00
|
|
|
raise ValueError("Bad extention. Can't open the table.")
|
2024-07-27 15:39:09 +00:00
|
|
|
|
2024-08-14 08:44:38 +00:00
|
|
|
def write(self, content, table: str, schema: str = "."):
|
2024-07-27 15:39:09 +00:00
|
|
|
table_path = self.build_table_path(table, schema)
|
|
|
|
pass
|
|
|
|
|
2024-08-14 08:44:38 +00:00
|
|
|
def delete_table(self, table: str, schema: str = "."):
|
2024-07-27 15:39:09 +00:00
|
|
|
table_path = self.build_table_path(table, schema)
|
|
|
|
pass
|