From f9be31c090ea83f85c731a2196619210495370fc Mon Sep 17 00:00:00 2001 From: Bertrand Benjamin Date: Wed, 5 Jul 2023 17:49:25 +0200 Subject: [PATCH] Fix #3: replace empty string with np.nan --- pdf_oralia/pages/charge.py | 3 +-- pdf_oralia/pages/locataire.py | 8 ++++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/pdf_oralia/pages/charge.py b/pdf_oralia/pages/charge.py index b7b9f9c..bd43a20 100644 --- a/pdf_oralia/pages/charge.py +++ b/pdf_oralia/pages/charge.py @@ -83,6 +83,5 @@ def table2df(tables): df = pd.concat(dfs) df["immeuble"] = df["immeuble"].apply(lambda x: x[0].capitalize()) - print(df.columns) df["lot"] = df["RECAPITULATIF DES OPERATIONS"].apply(get_lot) - return df.astype(DF_TYPES, errors="ignore") + return df.astype(DF_TYPES) diff --git a/pdf_oralia/pages/locataire.py b/pdf_oralia/pages/locataire.py index cf1df48..9dcddca 100644 --- a/pdf_oralia/pages/locataire.py +++ b/pdf_oralia/pages/locataire.py @@ -1,3 +1,4 @@ +import numpy as np import pandas as pd DF_TYPES = { @@ -33,7 +34,7 @@ def is_drop(row): def extract(table, additionnal_fields: dict = {}): - """Turn table to dictionary with additionnal fields""" + """Turn table to dictionary with additional fields""" extracted = [] header = table[0] for row in table[1:]: @@ -159,4 +160,7 @@ def table2df(tables): df["immeuble"] = df["immeuble"].apply(lambda x: x[0].capitalize()) df["Type"] = df["Type"].apply(clean_type) - return df.astype(DF_TYPES, errors="ignore") + numeric_cols = [k for k, v in DF_TYPES.items() if v == float] + df[numeric_cols] = df[numeric_cols].replace("", np.nan) + + return df.astype(DF_TYPES)