From 020e9195b12af0e774bcae8e379a247d2eb7c3d3 Mon Sep 17 00:00:00 2001 From: Bertrand Benjamin Date: Tue, 27 Sep 2022 21:14:27 +0200 Subject: [PATCH] Feat: set type for columns --- pdf_oralia/extract.py | 4 ++-- pdf_oralia/extract_charge.py | 10 ++++++++++ pdf_oralia/extract_locataire.py | 17 ++++++++++++++++- 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/pdf_oralia/extract.py b/pdf_oralia/extract.py index 0058a10..4435042 100644 --- a/pdf_oralia/extract.py +++ b/pdf_oralia/extract.py @@ -32,8 +32,8 @@ def extract_from_pdf(pdf, charge_dest, location_dest): df_charge.to_excel(charge_dest, sheet_name="Charges", index=False) print(f"{charge_dest} saved") - df_loc = extract_situation_loc(loc_table) - df_loc = df_loc.assign(mois=mois, annee=annee) + df_loc = extract_situation_loc(loc_table, mois=mois, annee=annee) + df_loc = df_loc.assign() df_loc.to_excel(location_dest, sheet_name="Location", index=False) print(f"{location_dest} saved") diff --git a/pdf_oralia/extract_charge.py b/pdf_oralia/extract_charge.py index 4705104..90a0302 100644 --- a/pdf_oralia/extract_charge.py +++ b/pdf_oralia/extract_charge.py @@ -8,10 +8,20 @@ def extract_charge(table): pd.DataFrame(table[1:], columns=table[0]) .replace("", np.nan) .dropna(subset=["Débits"]) + .astype( + { + "Débits": "float64", + "Crédits": "float64", + "Dont T.V.A.": "float64", + "Locatif": "float64", + "Déductible": "float64", + } + ) ) drop_index = df[ df["RECAPITULATIF DES OPERATIONS"].str.contains("TOTAUX", case=False) | df["RECAPITULATIF DES OPERATIONS"].str.contains("solde", case=False) ].index df.drop(drop_index, inplace=True) + return df diff --git a/pdf_oralia/extract_locataire.py b/pdf_oralia/extract_locataire.py index 8053d42..b92698f 100644 --- a/pdf_oralia/extract_locataire.py +++ b/pdf_oralia/extract_locataire.py @@ -18,7 +18,7 @@ def parse_above_loc(content): return pd.Series(row) -def extract_situation_loc(table): +def extract_situation_loc(table, mois, annee): """From pdfplumber table extract locataire df""" try: df = pd.DataFrame(table[1:], columns=table[0]) @@ -37,4 +37,19 @@ def extract_situation_loc(table): rows.append(up_row) df_cleaned = pd.concat(rows, axis=1).T df_cleaned.drop(["Locataires", "", "Période"], axis=1, inplace=True) + + df_cleaned = df_cleaned.astype( + { + "Loyers": "float64", + "Taxes": "float64", + "Provisions": "float64", + "Divers": "float64", + "Total": "float64", + "Réglés": "float64", + "Impayés": "float64", + }, + errors="ignore", + ) + + df_cleaned = df_cleaned.assign(mois=mois, annee=annee) return df_cleaned