plesna/scripts/gold_mart.py

86 lines
2.0 KiB
Python
Raw Normal View History

2024-03-03 06:46:36 +00:00
import logging
from collections.abc import Callable
from pathlib import Path
import pandas as pd
from scripts.flux import (
CSVSource,
Destination,
Flux,
SplitDestination,
Transformation,
consume_fluxes,
2024-04-15 09:59:32 +00:00
to_excel,
)
2024-03-03 06:46:36 +00:00
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
def build_lots(dfs: list[pd.DataFrame]) -> pd.DataFrame:
2024-03-04 19:09:20 +00:00
df = pd.concat(dfs)
2024-03-03 06:46:36 +00:00
return df
2024-03-05 18:20:33 +00:00
FLUXES_LOT = {
"Lots": Flux(
sources=[CSVSource(filename="CRG/crg-*.csv")],
transformation=Transformation(function=build_lots),
2024-04-15 09:59:32 +00:00
destination=SplitDestination(
name="Lot/lot", split_column="Lot", writer=to_excel
),
2024-03-05 18:20:33 +00:00
),
}
2024-03-03 06:46:36 +00:00
def build_pnl(dfs: list[pd.DataFrame], year: int) -> pd.DataFrame:
df = pd.concat(dfs)
df = df[df["Année"] == year]
pt = df.groupby(["Catégorie", "Mois"]).agg("sum").unstack().fillna(0)
pt.columns = [c[1] for c in pt.columns]
pt.reset_index(["Catégorie"])
return pt
def build_pnl_flux(year: int) -> Flux:
return Flux(
sources=[
CSVSource(filename=f"CRG/crg-{year}.csv"),
CSVSource(filename=f"banque/banque-{year}.csv"),
],
2024-03-05 17:59:01 +00:00
transformation=Transformation(
function=build_pnl,
extra_kwrds={"year": year},
),
2024-03-03 06:46:36 +00:00
destination=Destination(name=f"pnl/{year}"),
)
if __name__ == "__main__":
data_path = Path("datas/")
assert data_path.exists()
gold_path = data_path / "gold"
assert gold_path.exists()
gold_crg_path = gold_path / "CRG"
assert gold_crg_path.exists()
mart_path = data_path / "datamart"
assert mart_path.exists()
2024-03-05 18:20:33 +00:00
files = consume_fluxes(
fluxes=FLUXES_LOT, origin_path=gold_path, dest_path=mart_path
)
2024-03-03 06:46:36 +00:00
years = list(range(2017, 2024))
# pnl_fluxes = {f"pnl-{year}": build_pnl_flux(year) for year in years}
pnl_fluxes = {}
files = consume_fluxes(
2024-04-15 09:59:32 +00:00
fluxes=pnl_fluxes,
origin_path=gold_path,
dest_path=mart_path,
2024-03-03 06:46:36 +00:00
)
print(files)