2024-03-02 17:18:06 +00:00
|
|
|
import logging
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
import pandas as pd
|
|
|
|
|
2024-03-03 05:39:27 +00:00
|
|
|
from scripts.flux import consume_fluxes
|
2024-03-02 17:18:06 +00:00
|
|
|
|
2024-03-03 06:05:10 +00:00
|
|
|
from .flux import Destination, ExcelSource, Flux
|
2024-03-02 17:18:06 +00:00
|
|
|
|
2024-03-03 05:39:27 +00:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
logger.setLevel(logging.DEBUG)
|
2024-03-02 17:18:06 +00:00
|
|
|
|
|
|
|
|
2024-03-03 05:39:27 +00:00
|
|
|
def extract_cat(cat: pd.DataFrame):
|
2024-03-02 17:18:06 +00:00
|
|
|
cat_drop = list(cat[cat["Nouvelles"] == "NE PAS IMPORTER"]["Anciennes"])
|
|
|
|
cat_trans = cat[cat["Nouvelles"] != "NE PAS IMPORTER"]
|
|
|
|
|
|
|
|
trans = {}
|
|
|
|
for _, (old, new) in cat_trans.iterrows():
|
|
|
|
trans[old] = new
|
|
|
|
|
2024-03-02 17:32:06 +00:00
|
|
|
return trans, cat_drop
|
|
|
|
|
|
|
|
|
2024-03-03 05:39:27 +00:00
|
|
|
def trans_2017_2021(
|
|
|
|
dfs: list[pd.DataFrame], stagging_columns: list[str], **kwrds
|
|
|
|
) -> pd.DataFrame:
|
2024-03-02 17:32:06 +00:00
|
|
|
df, cat = dfs
|
|
|
|
cat_trans, cat_drop = extract_cat(cat)
|
|
|
|
|
2024-03-02 17:18:06 +00:00
|
|
|
df = df[~df["Categorie"].isin(cat_drop)]
|
|
|
|
|
|
|
|
df = df.assign(
|
|
|
|
Immeuble=df["immeuble"],
|
|
|
|
Porte=df["porte"],
|
|
|
|
Débit=df["Débit"].fillna(0),
|
|
|
|
Crédit=df["Crédit"].fillna(0),
|
|
|
|
Lot=df["immeuble"].astype(str) + df["porte"].astype("str").str.zfill(2),
|
|
|
|
Année=df["Date"].astype(str).str.slice(0, 4),
|
|
|
|
Mois=df["Date"].astype(str).str.slice(5, 7),
|
2024-03-02 17:32:06 +00:00
|
|
|
Catégorie=df["Categorie"].replace(cat_trans),
|
2024-03-02 17:18:06 +00:00
|
|
|
Fournisseur="",
|
|
|
|
)
|
|
|
|
|
2024-03-03 05:39:27 +00:00
|
|
|
return df[stagging_columns]
|
2024-03-02 17:18:06 +00:00
|
|
|
|
|
|
|
|
2024-03-03 05:39:27 +00:00
|
|
|
def trans_2022_charge(
|
|
|
|
dfs: list[pd.DataFrame], stagging_columns: list[str], **kwrds
|
|
|
|
) -> pd.DataFrame:
|
2024-03-02 17:18:06 +00:00
|
|
|
df = dfs[0]
|
|
|
|
df = df.assign(
|
|
|
|
Immeuble=df["immeuble"],
|
|
|
|
Porte=df["lot"],
|
|
|
|
Débit=df["Débits"].fillna(0),
|
|
|
|
Crédit=df["Crédits"].fillna(0),
|
|
|
|
Lot=df["immeuble"].astype(str)[0] + df["lot"].astype("str").str.zfill(2),
|
|
|
|
Année=df["annee"],
|
|
|
|
Mois=df["mois"],
|
|
|
|
Catégorie=df["Catégorie Charge"],
|
|
|
|
# Catégorie=df["Catégorie Charge"].replace(trans),
|
|
|
|
Fournisseur="",
|
|
|
|
Régie="Oralia - Gelas",
|
|
|
|
Libellé="",
|
|
|
|
)
|
2024-03-03 05:39:27 +00:00
|
|
|
return df[stagging_columns]
|
2024-03-02 17:18:06 +00:00
|
|
|
|
|
|
|
|
2024-03-03 05:39:27 +00:00
|
|
|
def trans_2022_loc(
|
|
|
|
dfs: list[pd.DataFrame], stagging_columns: list[str], **kwrds
|
|
|
|
) -> pd.DataFrame:
|
2024-03-02 17:18:06 +00:00
|
|
|
df = dfs[0]
|
|
|
|
df = df.assign(
|
|
|
|
Immeuble=df["immeuble"],
|
|
|
|
Porte=df["lot"],
|
|
|
|
Débit=0,
|
|
|
|
Crédit=df["Réglés"].fillna(0),
|
|
|
|
Lot=df["immeuble"].astype(str)[0] + df["lot"].astype("str").str.zfill(2),
|
|
|
|
Année=df["annee"],
|
|
|
|
Mois=df["mois"],
|
|
|
|
Catégorie="Loyer Charge",
|
|
|
|
Fournisseur="",
|
|
|
|
Régie="Oralia - Gelas",
|
|
|
|
Libellé="",
|
|
|
|
)
|
2024-03-03 05:39:27 +00:00
|
|
|
return df[stagging_columns]
|
2024-03-02 17:18:06 +00:00
|
|
|
|
|
|
|
|
2024-03-03 05:39:27 +00:00
|
|
|
def trans_2023(
|
|
|
|
dfs: list[pd.DataFrame], year: str, stagging_columns: list[str], **kwrds
|
|
|
|
) -> pd.DataFrame:
|
2024-03-02 17:18:06 +00:00
|
|
|
df = dfs[0]
|
|
|
|
df = df.assign(
|
|
|
|
Débit=df["Débit"].fillna(0),
|
|
|
|
Crédit=df["Crédit"].fillna(0),
|
|
|
|
Lot=df["Immeuble"].astype(str) + df["Porte"].astype("str").str.zfill(2),
|
|
|
|
Année=year,
|
|
|
|
)
|
2024-03-03 05:39:27 +00:00
|
|
|
return df[stagging_columns]
|
|
|
|
|
|
|
|
|
|
|
|
STAGGING_COLUMNS = [
|
|
|
|
"Régie",
|
|
|
|
"Immeuble",
|
|
|
|
"Porte",
|
|
|
|
"Lot",
|
|
|
|
"Année",
|
|
|
|
"Mois",
|
|
|
|
"Catégorie",
|
|
|
|
"Fournisseur",
|
|
|
|
"Libellé",
|
|
|
|
"Débit",
|
|
|
|
"Crédit",
|
|
|
|
]
|
|
|
|
|
|
|
|
FLUXES = {
|
2024-03-02 17:18:06 +00:00
|
|
|
"2017 2021 - charge et locataire.xlsx": Flux(
|
|
|
|
sources=[
|
2024-03-03 05:39:27 +00:00
|
|
|
ExcelSource(
|
2024-03-02 17:18:06 +00:00
|
|
|
filename="2017 2021 - charge et locataire.xlsx", sheet_name="DB CRG"
|
|
|
|
),
|
2024-03-03 05:39:27 +00:00
|
|
|
ExcelSource(
|
2024-03-02 17:18:06 +00:00
|
|
|
filename="2017 2021 - charge et locataire.xlsx",
|
|
|
|
sheet_name="Catégories",
|
|
|
|
),
|
|
|
|
],
|
|
|
|
transformation=trans_2017_2021,
|
2024-03-03 05:39:27 +00:00
|
|
|
extra_kwrds={"stagging_columns": STAGGING_COLUMNS},
|
2024-03-03 06:05:10 +00:00
|
|
|
destination=Destination(name="crg"),
|
|
|
|
split_destination="Année",
|
2024-03-02 17:18:06 +00:00
|
|
|
),
|
|
|
|
"2022 - charge.xlsx": Flux(
|
|
|
|
sources=[
|
2024-03-03 05:39:27 +00:00
|
|
|
ExcelSource(filename="2022 - charge.xlsx", sheet_name="Sheet1"),
|
2024-03-02 17:18:06 +00:00
|
|
|
],
|
|
|
|
transformation=trans_2022_charge,
|
2024-03-03 05:39:27 +00:00
|
|
|
extra_kwrds={"stagging_columns": STAGGING_COLUMNS},
|
2024-03-03 06:05:10 +00:00
|
|
|
destination=Destination(name="crg"),
|
|
|
|
split_destination="Année",
|
2024-03-02 17:18:06 +00:00
|
|
|
),
|
|
|
|
"2022 - locataire.xlsx": Flux(
|
|
|
|
sources=[
|
2024-03-03 05:39:27 +00:00
|
|
|
ExcelSource(filename="2022 - locataire.xlsx", sheet_name="Sheet1"),
|
2024-03-02 17:18:06 +00:00
|
|
|
],
|
|
|
|
transformation=trans_2022_loc,
|
2024-03-03 05:39:27 +00:00
|
|
|
extra_kwrds={"stagging_columns": STAGGING_COLUMNS},
|
2024-03-03 06:05:10 +00:00
|
|
|
destination=Destination(name="crg"),
|
|
|
|
split_destination="Année",
|
2024-03-02 17:18:06 +00:00
|
|
|
),
|
|
|
|
"2023 - charge et locataire.xlsx": Flux(
|
|
|
|
sources=[
|
2024-03-03 05:39:27 +00:00
|
|
|
ExcelSource(
|
2024-03-02 17:18:06 +00:00
|
|
|
filename="2023 - charge et locataire.xlsx",
|
|
|
|
sheet_name="DB CRG 2023 ...",
|
|
|
|
),
|
|
|
|
],
|
|
|
|
transformation=trans_2023,
|
2024-03-03 05:39:27 +00:00
|
|
|
extra_kwrds={"year": 2023, "stagging_columns": STAGGING_COLUMNS},
|
2024-03-03 06:05:10 +00:00
|
|
|
destination=Destination(name="crg"),
|
|
|
|
split_destination="Année",
|
2024-03-02 17:18:06 +00:00
|
|
|
),
|
|
|
|
}
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
data_path = Path("datas/")
|
|
|
|
assert data_path.exists()
|
|
|
|
history_path = data_path / "Histoire"
|
|
|
|
assert history_path.exists()
|
|
|
|
history_crg_path = history_path / "CRG"
|
|
|
|
assert history_crg_path.exists()
|
|
|
|
|
|
|
|
staging_path = data_path / "staging"
|
|
|
|
assert staging_path.exists()
|
|
|
|
staging_crg_path = staging_path / "CRG"
|
|
|
|
assert staging_crg_path.exists()
|
|
|
|
|
2024-03-03 06:05:10 +00:00
|
|
|
crg_files = consume_fluxes(
|
|
|
|
fluxes=FLUXES,
|
|
|
|
origin_path=history_crg_path,
|
|
|
|
dest_path=staging_crg_path,
|
|
|
|
)
|
2024-03-02 17:18:06 +00:00
|
|
|
print(crg_files)
|