Compare commits
2 Commits
2de0e5ef5c
...
959b53e6a0
Author | SHA1 | Date | |
---|---|---|---|
959b53e6a0 | |||
91e229eab2 |
0
dashboard/libs/flux/__init__.py
Normal file
0
dashboard/libs/flux/__init__.py
Normal file
70
dashboard/libs/flux/flux.py
Normal file
70
dashboard/libs/flux/flux.py
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
from collections.abc import Callable
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from ..repository.repository import AbstractRepository
|
||||||
|
|
||||||
|
|
||||||
|
class Schema(BaseModel):
|
||||||
|
repository: str
|
||||||
|
schema: str
|
||||||
|
|
||||||
|
|
||||||
|
class Table(BaseModel):
|
||||||
|
repository: str
|
||||||
|
schema: str
|
||||||
|
table: str
|
||||||
|
|
||||||
|
|
||||||
|
class Flux(BaseModel):
|
||||||
|
sources: list[Table]
|
||||||
|
destinations: dict[str, Table]
|
||||||
|
transformation: Callable[[list[pd.DataFrame]], dict[str, pd.DataFrame]]
|
||||||
|
|
||||||
|
|
||||||
|
class State(BaseModel):
|
||||||
|
statuses: dict[str, str]
|
||||||
|
qty_out: int
|
||||||
|
failed_lines: list[str]
|
||||||
|
start: datetime
|
||||||
|
end: datetime
|
||||||
|
|
||||||
|
|
||||||
|
Repositories = dict[str, AbstractRepository]
|
||||||
|
|
||||||
|
|
||||||
|
def open_source(repositories: Repositories, source: Table) -> pd.DataFrame:
|
||||||
|
return repositories[source.repository].read(source.table, source.schema)
|
||||||
|
|
||||||
|
|
||||||
|
def write_source(
|
||||||
|
content: pd.DataFrame, repositories: Repositories, destination: Table
|
||||||
|
) -> str:
|
||||||
|
return repositories[destination.repository].write(
|
||||||
|
content, destination.table, destination.schema
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def consume_flux(flux: Flux, repositories: dict[str, AbstractRepository]) -> State:
|
||||||
|
start = datetime.now()
|
||||||
|
src_dfs = [open_source(repositories, source) for source in flux.sources]
|
||||||
|
|
||||||
|
built_dfs = flux.transformation(src_dfs)
|
||||||
|
|
||||||
|
statuses = {
|
||||||
|
dest: write_source(df, repositories, flux.destinations[dest])
|
||||||
|
for dest, df in built_dfs.items()
|
||||||
|
}
|
||||||
|
|
||||||
|
end = datetime.now()
|
||||||
|
qty_out = 0
|
||||||
|
failed_lines = []
|
||||||
|
return State(
|
||||||
|
statuses=statuses,
|
||||||
|
qty_out=qty_out,
|
||||||
|
failed_lines=failed_lines,
|
||||||
|
start=start,
|
||||||
|
end=end,
|
||||||
|
)
|
@ -1,35 +0,0 @@
|
|||||||
from .schema import AbstractSchema
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
class FSSchema(AbstractSchema):
|
|
||||||
def __init__(self, basepath, metadata_engine=None):
|
|
||||||
self.basepath = basepath
|
|
||||||
self._metadata_engine = metadata_engine
|
|
||||||
|
|
||||||
def ls(self, dir, only_files=True):
|
|
||||||
dirpath = Path(dir)
|
|
||||||
if only_files:
|
|
||||||
return [f for f in dirpath.iterdir() if f.is_dir()]
|
|
||||||
return [f for f in dirpath.iterdir()]
|
|
||||||
|
|
||||||
def tables(self, dir, only_files=True):
|
|
||||||
dirpath = Path(dir)
|
|
||||||
if only_files:
|
|
||||||
return [f for f in dirpath.iterdir() if f.is_dir()]
|
|
||||||
return [f for f in dirpath.iterdir()]
|
|
||||||
|
|
||||||
def info(self, path):
|
|
||||||
path = Path(path)
|
|
||||||
pass
|
|
||||||
|
|
||||||
def read(self, path):
|
|
||||||
path = Path(path)
|
|
||||||
pass
|
|
||||||
|
|
||||||
def write(self, path, content):
|
|
||||||
path = Path(path)
|
|
||||||
pass
|
|
||||||
|
|
||||||
def delete(self, path):
|
|
||||||
path = Path(path)
|
|
||||||
pass
|
|
@ -55,7 +55,7 @@ class FSRepository(AbstractRepository):
|
|||||||
|
|
||||||
raise ValueError("Can't open the table")
|
raise ValueError("Can't open the table")
|
||||||
|
|
||||||
def write(self, table:str, content, schema:str='.'):
|
def write(self, content, table:str, schema:str='.'):
|
||||||
table_path = self.build_table_path(table, schema)
|
table_path = self.build_table_path(table, schema)
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
import abc
|
import abc
|
||||||
|
|
||||||
from .metadata import AbstractMetadataEngine
|
from .metadata import AbstractMetadataEngine
|
||||||
|
|
||||||
|
|
||||||
@ -6,31 +7,31 @@ class AbstractRepository(abc.ABC):
|
|||||||
metadata_engine = AbstractMetadataEngine
|
metadata_engine = AbstractMetadataEngine
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def schemas():
|
def schemas(self) -> list[str]:
|
||||||
"""List schemas"""
|
"""List schemas"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def tables(schema):
|
def tables(self, schema) -> list[str]:
|
||||||
"""List table in schema"""
|
"""List table in schema"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def info(self, path):
|
def infos(self, table: str, schema: str) -> dict[str, str]:
|
||||||
""" Get infos about a file"""
|
"""Get infos about the table"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def read(self, path):
|
def read(self, table: str, schema: str):
|
||||||
""" Get content of a file"""
|
"""Get content of the table"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def write(self, path, content):
|
def write(self, content, table: str, schema: str):
|
||||||
""" Write content into the file"""
|
"""Write content into the table"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def delete(self, path):
|
def delete_table(self, table: str, schema: str):
|
||||||
""" Delete the file """
|
"""Delete the table"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
88
tests/test_flux.py
Normal file
88
tests/test_flux.py
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from dashboard.libs.repository.repository import AbstractRepository
|
||||||
|
|
||||||
|
FakeTable = pd.DataFrame
|
||||||
|
FakeSchema = dict[str, pd.DataFrame]
|
||||||
|
FakeSchemas = dict[str, FakeSchema]
|
||||||
|
|
||||||
|
|
||||||
|
class FakeRepository(AbstractRepository):
|
||||||
|
def __init__(self, schemas: FakeSchemas):
|
||||||
|
self._schemas = {}
|
||||||
|
for schema_name, tables in schemas.items():
|
||||||
|
schema = {}
|
||||||
|
for table, df in tables.items():
|
||||||
|
schema[table] = {
|
||||||
|
"df": df,
|
||||||
|
"metadata": {
|
||||||
|
"status": "new",
|
||||||
|
"qty_read": 0,
|
||||||
|
"qty_write": 0,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
self._schemas[schema_name] = schema
|
||||||
|
|
||||||
|
def schemas(self):
|
||||||
|
"""List schemas"""
|
||||||
|
return list(self._schemas.keys())
|
||||||
|
|
||||||
|
def tables(self, schema):
|
||||||
|
"""List table's name in schema"""
|
||||||
|
return list(self._schemas[schema].keys())
|
||||||
|
|
||||||
|
def infos(self, table: str, schema: str) -> dict[str, str]:
|
||||||
|
"""Get infos about the table"""
|
||||||
|
return self._schemas[schema][table]["metadata"]
|
||||||
|
|
||||||
|
def read(self, table, schema) -> pd.DataFrame:
|
||||||
|
"""Get content of the table"""
|
||||||
|
self._schemas[schema][table]["metadata"]["qty_read"] += 1
|
||||||
|
return self._schemas[schema][table]["df"]
|
||||||
|
|
||||||
|
def write(self, content, table, schema) -> dict[str, str]:
|
||||||
|
"""Write content into the table"""
|
||||||
|
self._schemas[schema][table]["df"] = content
|
||||||
|
self._schemas[schema][table]["metadata"]["status"] = "modified"
|
||||||
|
self._schemas[schema][table]["metadata"]["qty_write"] += 1
|
||||||
|
return self.infos(table, schema)
|
||||||
|
|
||||||
|
def delete_table(self, table, schema):
|
||||||
|
"""Delete the table"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
|
def test_fakerepository():
|
||||||
|
fakerepository = FakeRepository(
|
||||||
|
{
|
||||||
|
"foo": {
|
||||||
|
"table1": pd.DataFrame({"A": []}),
|
||||||
|
"table2": pd.DataFrame({"B": []}),
|
||||||
|
},
|
||||||
|
"bar": {
|
||||||
|
"table1": pd.DataFrame({"C": []}),
|
||||||
|
"table2": pd.DataFrame({"D": []}),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
assert fakerepository.schemas() == ["foo", "bar"]
|
||||||
|
assert fakerepository.tables("foo") == ["table1", "table2"]
|
||||||
|
assert fakerepository.infos("table1", "foo") == {
|
||||||
|
"status": "new",
|
||||||
|
"qty_read": 0,
|
||||||
|
"qty_write": 0,
|
||||||
|
}
|
||||||
|
assert fakerepository.read("table1", "foo").equals(pd.DataFrame({"A": []}))
|
||||||
|
assert fakerepository.infos("table1", "foo") == {
|
||||||
|
"status": "new",
|
||||||
|
"qty_read": 1,
|
||||||
|
"qty_write": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
df = pd.DataFrame({"A": [1, 2]})
|
||||||
|
assert fakerepository.write(df, "table1", "foo") == {
|
||||||
|
"status": "modified",
|
||||||
|
"qty_read": 1,
|
||||||
|
"qty_write": 1,
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user