Feat: test on pandas xlsx and ods file reader

Feat: start testing fs_repository
Feat: test consume_flux
2024-10-07 05:27:46 +02:00 · 2024-08-14 10:44:38 +02:00 · 2024-08-14 07:41:36 +02:00 · 2024-08-14 07:22:01 +02:00 · 2024-08-14 07:21:36 +02:00 · 2024-08-07 11:39:33 +02:00
37 changed files with 150 additions and 759 deletions
--- a/66
+++ b/66
@@ -0,0 +1,66 @@
+DATA_BASE=./datas
+
+PDF_BASE=$(DATA_BASE)/pdfs
+PDF_YEARS=$(wildcard $(PDF_BASE)/*)
+
+RAW_BASE=$(DATA_BASE)/raw
+RAW_CRG=$(RAW_BASE)/CRG
+RAW_CRG_YEARS=$(subst $(PDF_BASE), $(RAW_CRG), $(PDF_YEARS))
+
+
+$(RAW_CRG)/%/: $(wildcard $(PDF_BASE)/%/*)
+	echo $(wildcard $(PDF_BASE)/$*/*)
+	@echo ----
+	ls $(PDF_BASE)/$*/
+	@echo ----
+	echo $*
+	@echo ----
+	echo $^
+	@echo ----
+	echo $?
+
+#./datas/raw/CRG/%: 
+#pdf-oralia extract all --src $$year --dest $$(subst $$PDF_BASE, $$RAW_CRG, $$year)
+# $(RAW_CRG_YEARS): $(PDF_PATHS)
+# 	for year in $(PDF_PATHS); do \
+# 		echo $$year; \
+# 		echo $$(subst $$PDF_BASE, $$RAW_CRG, $$year); \
+# 		echo "----"; \
+# 	done;
+
+extract_pdfs:
+	for year in 2021 2022 2023 2024; do \
+		mkdir -p $(RAW_CRG)/$$year/extracted;\
+		pdf-oralia extract all --src $(PDF_BASE)/$$year/ --dest $(RAW_CRG)/$$year/extracted; \
+		pdf-oralia join --src $(RAW_CRG)/$$year/extracted/ --dest $(RAW_CRG)/$$year/; \
+	done
+
+clean_raw:
+	rm -rf ./PLESNA Compta SYSTEM/raw/**/*.csv
+
+clean_built:
+	rm -rf $(DATA_BASE)/staging/**/*.csv
+	rm -rf $(DATA_BASE)/gold/**/*.csv
+	rm -rf $(DATA_BASE)/datamart/**/*.csv
+	rm -rf $(DATA_BASE)/datamart/**/*.xlsx
+
+run_ingest:
+	python -m scripts ingest
+
+run_feature:
+	python -m scripts feature
+
+run_datamart:
+	python -m scripts datamart
+
+build: clean_built run_ingest run_feature run_datamart
+
+clean_all: clean_built clean_raw
+
+import_nextcloud:
+	rsync -av ~/Nextcloud/PLESNA\ Compta\ SYSTEM/Histoire/ ./datas/Histoire
+
+push_nextcloud:
+	rsync -av ./datas/datamart/ ~/Nextcloud/PLESNA\ Compta\ SYSTEM/DataMart
+
+
--- a/README.md
+++ b/README.md
@@ -1,15 +1,5 @@
 # E(T)LT pour Plesna

-## Installation
-
-## Concepts
-
- `dataplatform`: agrégation d'un datacatalogue, de moteur de compute et du dag des transformations.
- `datacatalogue`: gestion du contenu des datastores.
- `datastore`: interface de stockage des données.
- `compute`: moteur de traitement des fluxs.
- `graph/dag`: organisation logique des fluxs et des données.
-
 ## Stages

 - Raw: fichiers les plus brutes possibles
--- a/plesna/compute/init.py
+++ b/plesna/compute/init.py
--- a/plesna/compute/consume_flux.py
+++ b/plesna/compute/consume_flux.py
@@ -1,8 +0,0 @@
-from plesna.models.flux import Flux, FluxMetaData
-
-
-def consume_flux(flux: Flux) -> FluxMetaData:
-    metadata = flux.transformation.function(
-        sources=flux.sources, targets=flux.targets, **flux.transformation.extra_kwrds
-    )
-    return FluxMetaData(data=metadata)
--- a/plesna/dataplatform.py
+++ b/plesna/dataplatform.py
@@ -1,27 +0,0 @@
-from plesna.datastore.datacatalogue import DataCatalogue
-from plesna.graph.graph_set import GraphSet
-
-
-class DataPlateformError(Exception):
-    pass
-
-
-class DataPlateform:
-    def __init__(self):
-        self._graphset = GraphSet()
-        self._metadata_engine = ""
-        self._transformations = {}
-        self._datacatalogues = {}
-
-    def add_datacatalague(self, name: str, datacatalogue: DataCatalogue):
-        if name in self._datacatalogues:
-            raise DataPlateformError("The datacatalogue {name} already exists")
-
-        self._datacatalogues[name] = datacatalogue
-
-    @property
-    def datacatalogues(self):
-        return list(self._datacatalogues)
-
-    def get_datacatalogue(self, name: str):
-        return self._datacatalogues[name]
--- a/plesna/datastore/init.py
+++ b/plesna/datastore/init.py
--- a/plesna/datastore/datacatalogue.py
+++ b/plesna/datastore/datacatalogue.py
@@ -1,34 +0,0 @@
-import abc
-
-from plesna.models.storage import Schema, Table
-
-
-class DataCatalogue:
-    def __init__(self):
-        pass
-
-    @property
-    @abc.abstractmethod
-    def schemas(self) -> list[str]:
-        """List schema's names"""
-        raise NotImplementedError
-
-    @abc.abstractmethod
-    def schema(self, name: str) -> Schema:
-        """Get the schema properties"""
-        raise NotImplementedError
-
-    @abc.abstractmethod
-    def tables(self, schema:str) -> list[str]:
-        """List table's name in schema"""
-        raise NotImplementedError
-
-    @abc.abstractmethod
-    def table(self, schema:str, table:str) -> Table:
-        """Get the table properties"""
-        raise NotImplementedError
-
-    @abc.abstractmethod
-    def infos(self, table: str, schema: str) -> dict[str, str]:
-        """Get infos about the table"""
-        raise NotImplementedError
--- a/plesna/datastore/datastore.py
+++ b/plesna/datastore/datastore.py
@@ -1,3 +0,0 @@
-class DataStore:
-    def __init__(self, name):
-        self._name
--- a/plesna/datastore/fake_datacatalogue.py
+++ b/plesna/datastore/fake_datacatalogue.py
@@ -1,81 +0,0 @@
-from pathlib import Path
-
-from pydantic import BaseModel, computed_field
-
-from plesna.models.storage import Schema, Table
-
-from .datacatalogue import DataCatalogue
-
-
-class FakeSchema(BaseModel):
-    name: str
-
-    @computed_field
-    @property
-    def ref(self) -> Schema:
-        return Schema(
-            id=str(self.name),
-            value=str(self.name),
-        )
-
-
-class FakeTable(BaseModel):
-    name: str
-    data: dict[str, list]
-
-    @computed_field
-    @property
-    def ref(self) -> Table:
-        return Table(
-            id=str(self.name),
-            value=str(self.name),
-        )
-
-
-class FakeDataCatalogue(DataCatalogue):
-    """DataCatalogue based on dictionnaries"""
-
-    def __init__(self, name: str):
-        self.name = name
-
-    def ls(
-        self, dir="", only_files=False, only_directories=False, recursive=False
-    ) -> list[str]:
-        dirpath = self._basepath / dir
-
-        if only_files:
-            return [
-                str(f.relative_to(dirpath))
-                for f in dirpath.iterdir()
-                if not f.is_dir() and not str(f).startswith(".")
-            ]
-
-        if only_directories:
-            if recursive:
-                return [
-                    str(f[0].relative_to(dirpath))
-                    for f in dirpath.walk()
-                    if not str(f).startswith(".")
-                ]
-
-            return [
-                str(f.relative_to(dirpath))
-                for f in dirpath.iterdir()
-                if f.is_dir() and not str(f).startswith(".")
-            ]
-
-        return [
-            str(f.relative_to(dirpath))
-            for f in dirpath.iterdir()
-            if not str(f).startswith(".")
-        ]
-
-    def schemas(self) -> dict[str, FSSchema]:
-        """List schemas (sub directories within basepath)"""
-        subdirectories = self.ls("", only_directories=True, recursive=True)
-        return {str(path): FSSchema(path=path) for path in subdirectories}
-
-    def tables(self, schema_id=".") -> dict[str, FSTable]:
-        """List table in schema (which are files in the directory)"""
-        schema_path = schema_id
-        return {path: FSTable(path=path) for path in self.ls(schema_path, only_files=True)}
--- a/plesna/datastore/fs_datacatalogue.py
+++ b/plesna/datastore/fs_datacatalogue.py
@@ -1,91 +0,0 @@
-from pathlib import Path
-
-from pydantic import BaseModel, computed_field
-
-from plesna.models.storage import Schema, Table
-
-from .datacatalogue import DataCatalogue
-
-
-class FSTable(BaseModel):
-    path: Path
-
-    @computed_field
-    @property
-    def ref(self) -> Table:
-        return Table(
-            id=str(self.path),
-            value=str(self.path),
-        )
-
-
-class FSSchema(BaseModel):
-    path: Path
-    tables: list[str]
-
-    @computed_field
-    @property
-    def ref(self) -> Schema:
-        return Schema(
-            id=str(self.path),
-            value=str(self.path),
-        )
-
-
-
-class FSDataCatalogue(DataCatalogue):
-    """DataCatalogue based on files tree structure"""
-
-    def __init__(self, name: str, basepath: str = "."):
-        self._basepath = Path(basepath)
-        self.name = name
-
-        assert self._basepath.exists()
-
-    def ls(
-        self, dir="", only_files=False, only_directories=False, recursive=False
-    ) -> list[str]:
-        dirpath = self._basepath / dir
-
-        if only_files:
-            return [
-                str(f.relative_to(dirpath))
-                for f in dirpath.iterdir()
-                if not f.is_dir() and not str(f).startswith(".")
-            ]
-
-        if only_directories:
-            if recursive:
-                return [
-                    str(f[0].relative_to(dirpath))
-                    for f in dirpath.walk()
-                    if not str(f).startswith(".")
-                ]
-
-            return [
-                str(f.relative_to(dirpath))
-                for f in dirpath.iterdir()
-                if f.is_dir() and not str(f).startswith(".")
-            ]
-
-        return [
-            str(f.relative_to(dirpath))
-            for f in dirpath.iterdir()
-            if not str(f).startswith(".")
-        ]
-
-    @property
-    def schemas(self) -> list[str]:
-        """List schemas (sub directories within basepath)"""
-        subdirectories = self.ls("", only_directories=True, recursive=True)
-        return [str(d) for d in subdirectories]
-
-    def schema(self, schema: str) -> FSSchema:
-        """List schemas (sub directories within basepath)"""
-        tables = self.ls(schema, only_files=True)
-        return FSSchema(path=Path(schema), tables=tables)
-
-    def table(self, schema: str, table:str) -> FSTable:
-        """List table in schema (which are files in the directory)"""
-        schema_path = schema_id
-        return {path: FSTable(path=path) for path in self.ls(schema_path, only_files=True)}
--- a/plesna/graph/init.py
+++ b/plesna/graph/init.py
--- a/plesna/graph/graph.py
+++ b/plesna/graph/graph.py
@@ -1,98 +0,0 @@
-from functools import reduce
-from typing import Callable
-
-from pydantic import BaseModel
-
-
-class Node(BaseModel):
-    name: str
-    infos: dict = {}
-
-    def __hash__(self):
-        return hash(self.name)
-
-
-class Edge(BaseModel):
-    arrow_name: str
-    source: Node
-    target: Node
-    edge_kwrds: dict = {}
-
-
-class Graph:
-    def __init__(self, nodes: list[Node] = [], edges: list[Edge] = []):
-        self._edges = []
-        self._nodes = set()
-        self.add_edges(edges)
-        self.add_nodes(nodes)
-
-    def add_node(self, node: Node):
-        self._nodes.add(node)
-
-    def add_nodes(self, nodes: list[Node]):
-        for node in nodes:
-            self.add_node(node)
-
-    def add_edge(self, edge: Edge):
-        self._edges.append(edge)
-        self.add_node(edge.source)
-        self.add_node(edge.target)
-
-    def add_edges(self, edges: list[Edge]):
-        for edge in edges:
-            self.add_edge(edge)
-
-    @property
-    def nodes(self):
-        return self._nodes
-
-    @property
-    def edges(self):
-        return self._edges
-
-    def get_edges_from(self, node: Node) -> list[Edge]:
-        """Get all edges which have the node as source"""
-        return [edge for edge in self._edges if edge.source == node]
-
-    def get_edges_to(self, node: Node) -> list[Edge]:
-        """Get all edges which have the node as target"""
-        return [edge for edge in self._edges if edge.target == node]
-
-    def get_direct_targets_from(self, node: Node) -> set[Node]:
-        """Get direct nodes that are accessible from the node"""
-        return set(edge.target for edge in self._edges if edge.source == node)
-
-    def get_targets_from(self, node: Node) -> set[Node]:
-        """Get all nodes that are accessible from the node
-
-        If the graph have a loop, the procedure be in an infinite loop!
-
-        """
-        direct_targets = self.get_direct_targets_from(node)
-        undirect_targets = [self.get_targets_from(n) for n in direct_targets]
-        undirect_targets = reduce(lambda x, y: x.union(y), undirect_targets, set())
-
-        return direct_targets.union(undirect_targets)
-
-    def get_direct_sources_from(self, node: Node) -> set[Node]:
-        """Get direct nodes that are targeted the node"""
-        return set(edge.source for edge in self._edges if edge.target == node)
-
-    def get_sources_from(self, node: Node) -> set[Node]:
-        """Get all nodes that are targeted the node"""
-        direct_sources = self.get_direct_sources_from(node)
-        undirect_sources = [self.get_sources_from(n) for n in direct_sources]
-        undirect_sources = reduce(lambda x, y: x.union(y), undirect_sources, set())
-
-        return direct_sources.union(undirect_sources)
-
-    def is_dag(self) -> bool:
-        visited = set()
-        for node in self._nodes:
-            if node not in visited:
-                try:
-                    targets = self.get_targets_from(node)
-                except RecursionError:
-                    return False
-                visited.union(targets)
-        return True
--- a/plesna/graph/graph_set.py
+++ b/plesna/graph/graph_set.py
@@ -1,36 +0,0 @@
-from typing import Callable
-
-from pydantic import BaseModel
-
-
-class Node(BaseModel):
-    name: str
-    infos: dict = {}
-
-    def __hash__(self):
-        return hash(self.name)
-
-
-class EdgeOnSet(BaseModel):
-    arrow: Callable
-    sources: dict[str, Node]
-    targets: dict[str, Node]
-    edge_kwrds: dict = {}
-
-
-class GraphSet:
-    def __init__(self):
-        self._edges = []
-        self._node_sets = set()
-
-    def append(self, edge: EdgeOnSet):
-        self._edges.append(edge)
-        self._node_sets.add(frozenset(edge.sources.values()))
-        self._node_sets.add(frozenset(edge.targets.values()))
-
-    @property
-    def node_sets(self):
-        return self._node_sets
-
-    def is_valid_dag(self):
-        pass
--- a/plesna/models/init.py
+++ b/plesna/models/init.py
--- a/plesna/models/flux.py
+++ b/plesna/models/flux.py
@@ -1,14 +0,0 @@
-from pydantic import BaseModel
-
-from plesna.models.storage import Table
-from plesna.models.transformation import Transformation
-
-
-class Flux(BaseModel):
-    sources: dict[str, Table]
-    targets: dict[str, Table]
-    transformation: Transformation
-
-
-class FluxMetaData(BaseModel):
-    data: dict
--- a/plesna/models/storage.py
+++ b/plesna/models/storage.py
@@ -1,25 +0,0 @@
-from pydantic import BaseModel
-
-
-class Schema(BaseModel):
-    """Logical agregation for Table
-
-    id: uniq identifier for the schema
-    value: string which describe where to find the schema in the storage system
-
-    """
-
-    id: str
-    value: str
-
-
-class Table(BaseModel):
-    """Place where data are stored
-
-    id: uniq identifier for the table
-    value: string which describe where to find the table in the storage system
-
-    """
-
-    id: str
-    value: str
--- a/plesna/models/transformation.py
+++ b/plesna/models/transformation.py
@@ -1,15 +0,0 @@
-from collections.abc import Callable
-
-from pydantic import BaseModel
-
-
-class Transformation(BaseModel):
-    """
-    The function have to have at least 2 arguments: sources and targets
-    Other arguments will came throught extra_kwrds
-
-    The function will have to return metadata as dict
-    """
-
-    function: Callable
-    extra_kwrds: dict = {}
--- a/tests/compute/init.py
+++ b/tests/compute/init.py
--- a/tests/compute/test_consume_flux.py
+++ b/tests/compute/test_consume_flux.py
@@ -1,35 +0,0 @@
-from plesna.compute.consume_flux import consume_flux
-from plesna.models.flux import Flux
-from plesna.models.storage import Table
-from plesna.models.transformation import Transformation
-
-
-def test_consume_flux():
-    sources = {
-        "src1": Table(id="src1", value="here"),
-        "src2": Table(id="src2", value="here"),
-    }
-    targets = {
-        "tgt1": Table(id="tgt1", value="this"),
-        "tgt2": Table(id="tgt2", value="that"),
-    }
-
-    def func(sources, targets, **kwrds):
-        return {
-            "sources": len(sources),
-            "targets": len(targets),
-            "kwrds": len(kwrds),
-        }
-
-    flux = Flux(
-        sources=sources,
-        targets=targets,
-        transformation=Transformation(function=func, extra_kwrds={"extra": "super"}),
-    )
-
-    meta = consume_flux(flux)
-    assert meta.data == {
-        "sources": 2,
-        "targets": 2,
-        "kwrds": 1,
-    }
--- a/tests/dataplatform/test_dataplateform.py
+++ b/tests/dataplatform/test_dataplateform.py
@@ -1,43 +0,0 @@
-from pathlib import Path
-
-import pytest
-
-from plesna.dataplatform import DataPlateform
-from plesna.datastore.fs_datacatalogue import FSDataCatalogue
-
-FIXTURE_DIR = Path(__file__).parent / Path("raw_data")
-
-
-@pytest.fixture
-def raw_catalogue(tmp_path):
-    raw_path = Path(tmp_path) / "raw"
-    raw_path.mkdir()
-    return FSDataCatalogue("raw", raw_path)
-
-
-@pytest.fixture
-def bronze_catalogue(tmp_path):
-    bronze_path = Path(tmp_path) / "bronze"
-    bronze_path.mkdir()
-    return FSDataCatalogue("bronze", bronze_path)
-
-
-@pytest.fixture
-def silver_catalogue(tmp_path):
-    silver_path = Path(tmp_path) / "silver"
-    silver_path.mkdir()
-    return FSDataCatalogue("silver", silver_path)
-
-
-def test_add_catalogue(
-    raw_catalogue: FSDataCatalogue,
-    bronze_catalogue: FSDataCatalogue,
-    silver_catalogue: FSDataCatalogue,
-):
-    dp = DataPlateform()
-    dp.add_datacatalague("raw", raw_catalogue)
-    dp.add_datacatalague("bronze", bronze_catalogue)
-    dp.add_datacatalague("silver", silver_catalogue)
-
-    assert dp.datacatalogues == ["raw", "bronze", "silver"]
-    assert dp.get_datacatalogue("raw") == raw_catalogue
--- a/tests/datastore/init.py
+++ b/tests/datastore/init.py
--- a/tests/datastore/test_fs_datacatalogue.py
+++ b/tests/datastore/test_fs_datacatalogue.py
@@ -1,61 +0,0 @@
-import shutil
-from pathlib import Path
-
-import pytest
-
-from plesna.datastore.fs_datacatalogue import FSDataCatalogue
-from plesna.models.storage import Schema
-
-FIXTURE_DIR = Path(__file__).parent.parent / Path("./raw_datas/")
-
-
-@pytest.fixture
-def location(tmp_path):
-    loc = tmp_path
-    username_loc = loc / "username"
-    username_loc.mkdir()
-    salary_loc = loc / "salary"
-    salary_loc.mkdir()
-    example_src = FIXTURE_DIR
-    assert example_src.exists()
-
-    for f in example_src.glob("*"):
-        if "username" in str(f):
-            shutil.copy(f, username_loc)
-        else:
-            shutil.copy(f, salary_loc)
-
-    return loc
-
-
-def test_init(location):
-    repo = FSDataCatalogue("example", location)
-    assert repo.ls() == [
-        "username",
-        "salary",
-    ]
-
-    assert repo.ls(recursive=True) == [
-        "username",
-        "salary",
-    ]
-
-
-def test_list_schema(location):
-    repo = FSDataCatalogue("example", location)
-
-    assert repo.schemas == [".", "username", "salary"]
-    assert repo.schema(".").ref == Schema(id=".", value=".")
-    assert repo.schema("username").ref == Schema(id="username", value="username")
-
-def test_list_tables_schema(location):
-    repo = FSDataCatalogue("example", location)
-
-    assert repo.schema(".").tables == []
-    assert repo.schema("username").tables == [
-                'username.csv',
-                'username-password-recovery-code.xlsx',
-                'username-password-recovery-code.xls',
-            ]
-    assert repo.schema("salary").tables == ["salary.pdf"]
-
--- a/tests/e2e/test_datalake.py
+++ b/tests/e2e/test_datalake.py
@@ -1,39 +0,0 @@
-from pathlib import Path
-
-import pytest
-
-from plesna.dataplatform import DataPlateform
-from plesna.datastore.fs_datacatalogue import FSDataCatalogue
-
-FIXTURE_DIR = Path(__file__).parent / Path("raw_data")
-
-
-@pytest.fixture
-def raw_catalogue(tmp_path):
-    raw_path = Path(tmp_path) / "raw"
-    return FSDataCatalogue(raw_path)
-
-
-@pytest.fixture
-def bronze_catalogue(tmp_path):
-    bronze_path = Path(tmp_path) / "bronze"
-    return FSDataCatalogue(bronze_path)
-
-
-@pytest.fixture
-def silver_catalogue(tmp_path):
-    silver_path = Path(tmp_path) / "silver"
-    return FSDataCatalogue(silver_path)
-
-
-@pytest.fixture
-def dataplateform(
-    raw_catalogue: FSDataCatalogue,
-    bronze_catalogue: FSDataCatalogue,
-    silver_catalogue: FSDataCatalogue,
-):
-    dp = DataPlateform()
-    dp.add_datacatalague("raw", raw_catalogue)
-    dp.add_datacatalague("bronze", bronze_catalogue)
-    dp.add_datacatalague("silver", silver_catalogue)
-    pass
--- a/tests/graphs/init.py
+++ b/tests/graphs/init.py
--- a/tests/graphs/test_graph.py
+++ b/tests/graphs/test_graph.py
@@ -1,107 +0,0 @@
-import pytest
-
-from plesna.graph.graph import Edge, Graph, Node
-
-
-def test_append_nodess():
-    nodeA = Node(name="A")
-    nodeB = Node(name="B")
-
-    graph = Graph()
-    graph.add_node(nodeA)
-    graph.add_node(nodeB)
-
-    assert graph.nodes == {nodeA, nodeB}
-
-
-def test_append_edges():
-    nodeA = Node(name="A")
-    nodeB = Node(name="B")
-    nodeC = Node(name="C")
-
-    edge1 = Edge(arrow_name="arrow", source=nodeA, target=nodeC)
-    edge2 = Edge(arrow_name="arrow", source=nodeB, target=nodeC)
-
-    graph = Graph()
-    graph.add_edge(edge1)
-    graph.add_edge(edge2)
-
-    assert graph.nodes == {nodeA, nodeB, nodeC}
-
-
-def test_init_edges_nodes():
-    nodeA = Node(name="A")
-    nodeB = Node(name="B")
-    nodeC = Node(name="C")
-
-    edge1 = Edge(arrow_name="arrow", source=nodeB, target=nodeC)
-
-    graph = Graph()
-    graph.add_node(nodeA)
-    graph.add_edge(edge1)
-
-    assert graph.nodes == {nodeA, nodeB, nodeC}
-
-
-@pytest.fixture
-def nodes():
-    return {
-        "A": Node(name="A"),
-        "B": Node(name="B"),
-        "C": Node(name="C"),
-        "D": Node(name="D"),
-    }
-
-
-@pytest.fixture
-def dag_edges(nodes):
-    return {
-        "1": Edge(arrow_name="arrow", source=nodes["A"], target=nodes["C"]),
-        "2": Edge(arrow_name="arrow", source=nodes["B"], target=nodes["C"]),
-        "3": Edge(arrow_name="arrow", source=nodes["C"], target=nodes["D"]),
-    }
-
-
-@pytest.fixture
-def notdag_edges(nodes):
-    return {
-        "1": Edge(arrow_name="arrow", source=nodes["A"], target=nodes["C"]),
-        "2": Edge(arrow_name="arrow", source=nodes["B"], target=nodes["C"]),
-        "3": Edge(arrow_name="arrow", source=nodes["C"], target=nodes["D"]),
-        "4": Edge(arrow_name="arrow", source=nodes["D"], target=nodes["B"]),
-    }
-
-
-def test_get_edges_from(nodes, dag_edges):
-    edges = dag_edges
-    graph = Graph(edges=edges.values())
-    assert graph.get_edges_from(nodes["A"]) == [edges["1"]]
-
-
-def test_get_targets_from(nodes, dag_edges):
-    edges = dag_edges
-    graph = Graph(edges=edges.values())
-    assert graph.get_direct_targets_from(nodes["A"]) == set([nodes["C"]])
-    assert graph.get_direct_targets_from(nodes["C"]) == set([nodes["D"]])
-    assert graph.get_direct_targets_from(nodes["D"]) == set()
-    assert graph.get_targets_from(nodes["A"]) == set([nodes["C"], nodes["D"]])
-
-
-def test_get_sources_from(nodes, dag_edges):
-    edges = dag_edges
-    graph = Graph(edges=edges.values())
-    assert graph.get_direct_sources_from(nodes["A"]) == set()
-    assert graph.get_direct_sources_from(nodes["C"]) == set([nodes["A"], nodes["B"]])
-    assert graph.get_direct_sources_from(nodes["D"]) == set([nodes["C"]])
-
-    assert graph.get_sources_from(nodes["D"]) == set(
-        [nodes["A"], nodes["B"], nodes["C"]]
-    )
-
-
-def test_valid_dage(dag_edges, notdag_edges):
-    graph = Graph(edges=dag_edges.values())
-    assert graph.is_dag()
-
-    graph = Graph(edges=notdag_edges.values())
-    assert not graph.is_dag()
--- a/tests/graphs/test_graph_set.py
+++ b/tests/graphs/test_graph_set.py
@@ -1,18 +0,0 @@
-from plesna.graph.graph_set import EdgeOnSet, GraphSet, Node
-
-
-def test_init():
-    nodeA = Node(name="A")
-    nodeB = Node(name="B")
-    nodeC = Node(name="C")
-
-    def arrow(sources, targets):
-        targets["C"].infos["res"] = sources["A"].name + sources["B"].name
-
-    edge1 = EdgeOnSet(
-        arrow=arrow, sources={"A": nodeA, "B": nodeB}, targets={"C": nodeC}
-    )
-    graph_set = GraphSet()
-    graph_set.append(edge1)
-
-    assert graph_set.node_sets == {frozenset([nodeA, nodeB]), frozenset([nodeC])}
--- a/tests/raw_datas/salary.pdf
+++ b/tests/raw_datas/salary.pdf
--- a/tests/raw_datas/username-password-recovery-code.xls
+++ b/tests/raw_datas/username-password-recovery-code.xls
--- a/tests/raw_datas/username-password-recovery-code.xlsx
+++ b/tests/raw_datas/username-password-recovery-code.xlsx
--- a/tests/raw_datas/username.csv
+++ b/tests/raw_datas/username.csv
@@ -1,7 +0,0 @@
-Username;Identifier;First name;Last name
-booker12;9012;Rachel;Booker
-grey07;2070;Laura;Grey
-johnson81;4081;Craig;Johnson
-jenkins46;9346;Mary;Jenkins
-smith79;5079;Jamie;Smith
-
--- a/tests/repository/init.py
+++ b/tests/repository/init.py
--- a/tests/repository/fs_examples/salary.pdf
+++ b/tests/repository/fs_examples/salary.pdf
--- a/tests/repository/fs_examples/username-password-recovery-code.xls
+++ b/tests/repository/fs_examples/username-password-recovery-code.xls
--- a/tests/repository/fs_examples/username-password-recovery-code.xlsx
+++ b/tests/repository/fs_examples/username-password-recovery-code.xlsx
--- a/tests/repository/fs_examples/username.csv
+++ b/tests/repository/fs_examples/username.csv
--- a/tests/repository/test_fs_repository.py
+++ b/tests/repository/test_fs_repository.py
@@ -0,0 +1,84 @@
+import shutil
+from pathlib import Path
+
+import pytest
+from pandas import pandas
+
+from dashboard.libs.repository.fs_repository import FSRepository
+
+EXAMPLE_DIR = "./tests/repository/fs_examples/"
+
+
+@pytest.fixture
+def location(tmp_path):
+    loc = tmp_path
+    username_loc = loc / "username"
+    username_loc.mkdir()
+    salary_loc = loc / "salary"
+    salary_loc.mkdir()
+    example_src = Path(EXAMPLE_DIR)
+
+    for f in example_src.glob("*"):
+        if "username" in str(f):
+            shutil.copy(f, username_loc)
+        else:
+            shutil.copy(f, salary_loc)
+
+    return loc
+
+
+def test_init(location):
+    repo = FSRepository("example", location)
+    assert repo.ls() == [
+        "username",
+        "salary",
+    ]
+    assert repo.schemas() == [
+        ".",
+        "username",
+        "salary",
+    ]
+
+    assert repo.tables() == []
+    assert repo.tables("username") == [
+        "username.csv",
+        "username-password-recovery-code.xlsx",
+        "username-password-recovery-code.xls",
+    ]
+    assert repo.tables("salary") == ["salary.pdf"]
+
+
+def test_read_csv(location):
+    repo = FSRepository("example", location)
+    username = repo.read("username.csv", "username", delimiter=";")
+    assert list(username.columns) == [
+        "Username",
+        "Identifier",
+        "First name",
+        "Last name",
+    ]
+    assert len(username.index) == 5
+
+
+def test_fake_read_xlsx(location):
+    repo = FSRepository("example", location)
+    df = pandas.read_excel(
+        location / "username" / "username-password-recovery-code.xls"
+    )
+    print(df)
+
+
+def test_read_xlsx(location):
+    repo = FSRepository("example", location)
+    username = repo.read("username-password-recovery-code.xls", "username")
+    assert list(username.columns) == [
+        "Username",
+        "Identifier",
+        "One-time password",
+        "Recovery code",
+        "First name",
+        "Last name",
+        "Department",
+        "Location",
+    ]
+    assert len(username.index) == 5
--- a/uv.lock
+++ b/uv.lock
@@ -1,7 +0,0 @@
-version = 1
-requires-python = ">=3.13"
-
-[[package]]
-name = "plesna"
-version = "0.1.0"
-source = { virtual = "." }
Author	SHA1	Message	Date
Bertrand Benjamin	e794242a03	Feat: test on pandas xlsx and ods file reader	2024-10-07 05:27:46 +02:00
Bertrand Benjamin	5450de8628	Feat: start testing fs_repository	2024-08-14 10:44:38 +02:00
Bertrand Benjamin	08c7fbe4c5	Feat: test consume_flux	2024-08-14 07:41:36 +02:00
Bertrand Benjamin	959b53e6a0	Feat: start flux	2024-08-14 07:22:01 +02:00
Bertrand Benjamin	91e229eab2	Feat: add __init__ and mod function signature	2024-08-14 07:21:36 +02:00
Bertrand Benjamin	2de0e5ef5c	refact: rename stage to repository	2024-08-07 11:39:33 +02:00
Bertrand Benjamin	7fb7bc6f5c	Feat: put table's callback and layout in factory	2024-07-28 18:49:07 +02:00
Bertrand Benjamin	612df0a8eb	Feat: callback to toggle editing	2024-07-28 17:34:56 +02:00
Bertrand Benjamin	74882ae572	Feat: add navigation	2024-07-28 12:29:14 +02:00
Bertrand Benjamin	d8f2fb52e1	feat: organise router path	2024-07-27 19:19:59 +02:00
Bertrand Benjamin	f9bfb917bd	feat: global design	2024-07-27 18:45:20 +02:00
Bertrand Benjamin	cdad13788a	feat: add tailwindcss	2024-07-27 18:22:00 +02:00
Bertrand Benjamin	29c82ae597	feat: add recursive schema	2024-07-27 17:50:29 +02:00
Bertrand Benjamin	5b53630688	feat: add schema and table listing	2024-07-27 17:39:09 +02:00
Bertrand Benjamin	ed6d1c87d1	feat: init dashboard	2024-07-27 15:55:20 +02:00