Feat: type df columns

This commit is contained in:
Bertrand Benjamin 2023-06-28 10:30:40 +02:00
parent 1a86b7bc26
commit 223f25130d
3 changed files with 39 additions and 8 deletions

View File

@ -3,7 +3,20 @@ import re
import numpy as np import numpy as np
import pandas as pd import pandas as pd
RECAPITULATIF_DES_OPERATION = 1 RECAPITULATIF_DES_OPERATIONS = 1
DF_TYPES = {
"Fournisseur": str,
"RECAPITULATIF DES OPERATIONS": str,
"Débits": float,
"Crédits": float,
"Dont T.V.A.": float,
"Locatif": float,
"Déductible": float,
"immeuble": str,
"mois": str,
"annee": str,
"lot": str,
}
def is_it(page_text): def is_it(page_text):
@ -27,14 +40,14 @@ def get_lot(txt):
def keep_row(row): def keep_row(row):
return not any( return not any(
[ [
word.lower() in row[RECAPITULATIF_DES_OPERATION].lower() word.lower() in row[RECAPITULATIF_DES_OPERATIONS].lower()
for word in ["TOTAL", "TOTAUX", "Solde créditeur", "Solde débiteur"] for word in ["TOTAL", "TOTAUX", "Solde créditeur", "Solde débiteur"]
] ]
) )
def extract(table, additionnal_fields: dict = {}): def extract(table, additionnal_fields: dict = {}):
"""Turn table to dictionary with additionnal fields""" """Turn table to dictionary with additional fields"""
extracted = [] extracted = []
header = table[0] header = table[0]
for row in table[1:]: for row in table[1:]:
@ -49,9 +62,9 @@ def extract(table, additionnal_fields: dict = {}):
for k, v in additionnal_fields.items(): for k, v in additionnal_fields.items():
r[k] = v r[k] = v
r["lot"] = get_lot(row[RECAPITULATIF_DES_OPERATION]) r["lot"] = get_lot(row[RECAPITULATIF_DES_OPERATIONS])
if "honoraire" in row[RECAPITULATIF_DES_OPERATION]: if "honoraire" in row[RECAPITULATIF_DES_OPERATIONS]:
r["Fournisseur"] = "IMI GERANCE" r["Fournisseur"] = "IMI GERANCE"
extracted.append(r) extracted.append(r)
@ -69,4 +82,5 @@ def table2df(tables):
) )
df["Fournisseur"] = df["Fournisseur"].fillna(method="ffill") df["Fournisseur"] = df["Fournisseur"].fillna(method="ffill")
dfs.append(df) dfs.append(df)
return pd.concat(dfs) df = pd.concat(dfs).astype(DF_TYPES, errors="ignore")
return df

View File

@ -1,5 +1,22 @@
import pandas as pd import pandas as pd
DF_TYPES = {
"Locataires": str,
"Période": str,
"Loyers": float,
"Taxes": float,
"Provisions": float,
"Divers": str,
"Total": float,
"Réglés": float,
"Impayés": float,
"immeuble": str,
"mois": str,
"annee": str,
"Lot": str,
"Type": str,
}
def is_it(page_text): def is_it(page_text):
if "SITUATION DES LOCATAIRES" in page_text: if "SITUATION DES LOCATAIRES" in page_text:
@ -131,4 +148,4 @@ def flat_tables(tables):
def table2df(tables): def table2df(tables):
tables = flat_tables(tables) tables = flat_tables(tables)
joined = join_row(tables) joined = join_row(tables)
return pd.DataFrame.from_records(joined) return pd.DataFrame.from_records(joined).astype(DF_TYPES, errors="ignore")

View File

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "pdf-oralia" name = "pdf-oralia"
version = "VERSION_PLACEHOLDER" version = "1.dev"
description = "" description = ""
authors = ["Bertrand Benjamin <benjamin.bertrand@opytex.org>"] authors = ["Bertrand Benjamin <benjamin.bertrand@opytex.org>"]
readme = "README.md" readme = "README.md"