Compare commits

..

No commits in common. "959b53e6a08d70de0bf9d65561ed2d739c619cb4" and "2de0e5ef5caa91f3b068ec9a140b98a3e0f3e0cb" have entirely different histories.

8 changed files with 48 additions and 172 deletions

View File

@ -1,70 +0,0 @@
from collections.abc import Callable
from datetime import datetime
import pandas as pd
from pydantic import BaseModel
from ..repository.repository import AbstractRepository
class Schema(BaseModel):
repository: str
schema: str
class Table(BaseModel):
repository: str
schema: str
table: str
class Flux(BaseModel):
sources: list[Table]
destinations: dict[str, Table]
transformation: Callable[[list[pd.DataFrame]], dict[str, pd.DataFrame]]
class State(BaseModel):
statuses: dict[str, str]
qty_out: int
failed_lines: list[str]
start: datetime
end: datetime
Repositories = dict[str, AbstractRepository]
def open_source(repositories: Repositories, source: Table) -> pd.DataFrame:
return repositories[source.repository].read(source.table, source.schema)
def write_source(
content: pd.DataFrame, repositories: Repositories, destination: Table
) -> str:
return repositories[destination.repository].write(
content, destination.table, destination.schema
)
def consume_flux(flux: Flux, repositories: dict[str, AbstractRepository]) -> State:
start = datetime.now()
src_dfs = [open_source(repositories, source) for source in flux.sources]
built_dfs = flux.transformation(src_dfs)
statuses = {
dest: write_source(df, repositories, flux.destinations[dest])
for dest, df in built_dfs.items()
}
end = datetime.now()
qty_out = 0
failed_lines = []
return State(
statuses=statuses,
qty_out=qty_out,
failed_lines=failed_lines,
start=start,
end=end,
)

View File

@ -0,0 +1,35 @@
from .schema import AbstractSchema
from pathlib import Path
class FSSchema(AbstractSchema):
def __init__(self, basepath, metadata_engine=None):
self.basepath = basepath
self._metadata_engine = metadata_engine
def ls(self, dir, only_files=True):
dirpath = Path(dir)
if only_files:
return [f for f in dirpath.iterdir() if f.is_dir()]
return [f for f in dirpath.iterdir()]
def tables(self, dir, only_files=True):
dirpath = Path(dir)
if only_files:
return [f for f in dirpath.iterdir() if f.is_dir()]
return [f for f in dirpath.iterdir()]
def info(self, path):
path = Path(path)
pass
def read(self, path):
path = Path(path)
pass
def write(self, path, content):
path = Path(path)
pass
def delete(self, path):
path = Path(path)
pass

View File

@ -55,7 +55,7 @@ class FSRepository(AbstractRepository):
raise ValueError("Can't open the table") raise ValueError("Can't open the table")
def write(self, content, table:str, schema:str='.'): def write(self, table:str, content, schema:str='.'):
table_path = self.build_table_path(table, schema) table_path = self.build_table_path(table, schema)
pass pass

View File

@ -1,5 +1,4 @@
import abc import abc
from .metadata import AbstractMetadataEngine from .metadata import AbstractMetadataEngine
@ -7,31 +6,31 @@ class AbstractRepository(abc.ABC):
metadata_engine = AbstractMetadataEngine metadata_engine = AbstractMetadataEngine
@abc.abstractmethod @abc.abstractmethod
def schemas(self) -> list[str]: def schemas():
"""List schemas""" """ List schemas """
raise NotImplementedError raise NotImplementedError
@abc.abstractmethod @abc.abstractmethod
def tables(self, schema) -> list[str]: def tables(schema):
"""List table in schema""" """ List table in schema"""
raise NotImplementedError raise NotImplementedError
@abc.abstractmethod @abc.abstractmethod
def infos(self, table: str, schema: str) -> dict[str, str]: def info(self, path):
"""Get infos about the table""" """ Get infos about a file"""
raise NotImplementedError raise NotImplementedError
@abc.abstractmethod @abc.abstractmethod
def read(self, table: str, schema: str): def read(self, path):
"""Get content of the table""" """ Get content of a file"""
raise NotImplementedError raise NotImplementedError
@abc.abstractmethod @abc.abstractmethod
def write(self, content, table: str, schema: str): def write(self, path, content):
"""Write content into the table""" """ Write content into the file"""
raise NotImplementedError raise NotImplementedError
@abc.abstractmethod @abc.abstractmethod
def delete_table(self, table: str, schema: str): def delete(self, path):
"""Delete the table""" """ Delete the file """
raise NotImplementedError raise NotImplementedError

View File

View File

@ -1,88 +0,0 @@
import pandas as pd
import pytest
from dashboard.libs.repository.repository import AbstractRepository
FakeTable = pd.DataFrame
FakeSchema = dict[str, pd.DataFrame]
FakeSchemas = dict[str, FakeSchema]
class FakeRepository(AbstractRepository):
def __init__(self, schemas: FakeSchemas):
self._schemas = {}
for schema_name, tables in schemas.items():
schema = {}
for table, df in tables.items():
schema[table] = {
"df": df,
"metadata": {
"status": "new",
"qty_read": 0,
"qty_write": 0,
},
}
self._schemas[schema_name] = schema
def schemas(self):
"""List schemas"""
return list(self._schemas.keys())
def tables(self, schema):
"""List table's name in schema"""
return list(self._schemas[schema].keys())
def infos(self, table: str, schema: str) -> dict[str, str]:
"""Get infos about the table"""
return self._schemas[schema][table]["metadata"]
def read(self, table, schema) -> pd.DataFrame:
"""Get content of the table"""
self._schemas[schema][table]["metadata"]["qty_read"] += 1
return self._schemas[schema][table]["df"]
def write(self, content, table, schema) -> dict[str, str]:
"""Write content into the table"""
self._schemas[schema][table]["df"] = content
self._schemas[schema][table]["metadata"]["status"] = "modified"
self._schemas[schema][table]["metadata"]["qty_write"] += 1
return self.infos(table, schema)
def delete_table(self, table, schema):
"""Delete the table"""
raise NotImplementedError
def test_fakerepository():
fakerepository = FakeRepository(
{
"foo": {
"table1": pd.DataFrame({"A": []}),
"table2": pd.DataFrame({"B": []}),
},
"bar": {
"table1": pd.DataFrame({"C": []}),
"table2": pd.DataFrame({"D": []}),
},
}
)
assert fakerepository.schemas() == ["foo", "bar"]
assert fakerepository.tables("foo") == ["table1", "table2"]
assert fakerepository.infos("table1", "foo") == {
"status": "new",
"qty_read": 0,
"qty_write": 0,
}
assert fakerepository.read("table1", "foo").equals(pd.DataFrame({"A": []}))
assert fakerepository.infos("table1", "foo") == {
"status": "new",
"qty_read": 1,
"qty_write": 0,
}
df = pd.DataFrame({"A": [1, 2]})
assert fakerepository.write(df, "table1", "foo") == {
"status": "modified",
"qty_read": 1,
"qty_write": 1,
}