From 44d415091013ddcedf2bbb8c49220fcdde4e43d5 Mon Sep 17 00:00:00 2001 From: Bertrand Benjamin Date: Wed, 28 Jun 2023 10:44:56 +0200 Subject: [PATCH] Feat: remove Appartement in type --- pdf_oralia/pages/charge.py | 10 ++++++---- pdf_oralia/pages/locataire.py | 15 +++++++++++++-- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/pdf_oralia/pages/charge.py b/pdf_oralia/pages/charge.py index 9048422..8a0a796 100644 --- a/pdf_oralia/pages/charge.py +++ b/pdf_oralia/pages/charge.py @@ -62,8 +62,6 @@ def extract(table, additionnal_fields: dict = {}): for k, v in additionnal_fields.items(): r[k] = v - r["lot"] = get_lot(row[RECAPITULATIF_DES_OPERATIONS]) - if "honoraire" in row[RECAPITULATIF_DES_OPERATIONS]: r["Fournisseur"] = "IMI GERANCE" @@ -82,5 +80,9 @@ def table2df(tables): ) df["Fournisseur"] = df["Fournisseur"].fillna(method="ffill") dfs.append(df) - df = pd.concat(dfs).astype(DF_TYPES, errors="ignore") - return df + df = pd.concat(dfs) + + df["immeuble"] = df["immeuble"].apply(lambda x: x[0].capitalize()) + print(df.columns) + df["lot"] = df["RECAPITULATIF DES OPERATIONS"].apply(get_lot) + return df.astype(DF_TYPES, errors="ignore") diff --git a/pdf_oralia/pages/locataire.py b/pdf_oralia/pages/locataire.py index 4ec4141..cf1df48 100644 --- a/pdf_oralia/pages/locataire.py +++ b/pdf_oralia/pages/locataire.py @@ -84,6 +84,12 @@ def parse_lot(string): return {"Lot": "{:02d}".format(int(words[1])), "Type": " ".join(words[2:])} +def clean_type(string): + if "appartement" in string.lower(): + return string[-2:] + return string + + def join_row(table): joined = [] for row in table: @@ -133,7 +139,7 @@ def join_row(table): ) joined.append(row) else: - print(row) + pass return joined @@ -148,4 +154,9 @@ def flat_tables(tables): def table2df(tables): tables = flat_tables(tables) joined = join_row(tables) - return pd.DataFrame.from_records(joined).astype(DF_TYPES, errors="ignore") + df = pd.DataFrame.from_records(joined) + + df["immeuble"] = df["immeuble"].apply(lambda x: x[0].capitalize()) + df["Type"] = df["Type"].apply(clean_type) + + return df.astype(DF_TYPES, errors="ignore")