Feat: isolate category replacements

This commit is contained in:
Bertrand Benjamin 2024-03-02 18:32:06 +01:00
parent f56edac92c
commit bd866dda36

View File

@ -78,8 +78,7 @@ def crg(history_path: Path, staging_path: Path, metadatas: dict, writing_func=to
return wrote_files
def trans_2017_2021(dfs, **kwrds):
df, cat = dfs
def extract_cat(cat):
cat_drop = list(cat[cat["Nouvelles"] == "NE PAS IMPORTER"]["Anciennes"])
cat_trans = cat[cat["Nouvelles"] != "NE PAS IMPORTER"]
@ -87,6 +86,13 @@ def trans_2017_2021(dfs, **kwrds):
for _, (old, new) in cat_trans.iterrows():
trans[old] = new
return trans, cat_drop
def trans_2017_2021(dfs, **kwrds):
df, cat = dfs
cat_trans, cat_drop = extract_cat(cat)
df = df[~df["Categorie"].isin(cat_drop)]
df = df.assign(
@ -97,7 +103,7 @@ def trans_2017_2021(dfs, **kwrds):
Lot=df["immeuble"].astype(str) + df["porte"].astype("str").str.zfill(2),
Année=df["Date"].astype(str).str.slice(0, 4),
Mois=df["Date"].astype(str).str.slice(5, 7),
Catégorie=df["Categorie"].replace(trans),
Catégorie=df["Categorie"].replace(cat_trans),
Fournisseur="",
)