Feat: type df columns
This commit is contained in:
parent
1a86b7bc26
commit
223f25130d
@ -3,7 +3,20 @@ import re
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
RECAPITULATIF_DES_OPERATION = 1
|
||||
RECAPITULATIF_DES_OPERATIONS = 1
|
||||
DF_TYPES = {
|
||||
"Fournisseur": str,
|
||||
"RECAPITULATIF DES OPERATIONS": str,
|
||||
"Débits": float,
|
||||
"Crédits": float,
|
||||
"Dont T.V.A.": float,
|
||||
"Locatif": float,
|
||||
"Déductible": float,
|
||||
"immeuble": str,
|
||||
"mois": str,
|
||||
"annee": str,
|
||||
"lot": str,
|
||||
}
|
||||
|
||||
|
||||
def is_it(page_text):
|
||||
@ -27,14 +40,14 @@ def get_lot(txt):
|
||||
def keep_row(row):
|
||||
return not any(
|
||||
[
|
||||
word.lower() in row[RECAPITULATIF_DES_OPERATION].lower()
|
||||
word.lower() in row[RECAPITULATIF_DES_OPERATIONS].lower()
|
||||
for word in ["TOTAL", "TOTAUX", "Solde créditeur", "Solde débiteur"]
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def extract(table, additionnal_fields: dict = {}):
|
||||
"""Turn table to dictionary with additionnal fields"""
|
||||
"""Turn table to dictionary with additional fields"""
|
||||
extracted = []
|
||||
header = table[0]
|
||||
for row in table[1:]:
|
||||
@ -49,9 +62,9 @@ def extract(table, additionnal_fields: dict = {}):
|
||||
for k, v in additionnal_fields.items():
|
||||
r[k] = v
|
||||
|
||||
r["lot"] = get_lot(row[RECAPITULATIF_DES_OPERATION])
|
||||
r["lot"] = get_lot(row[RECAPITULATIF_DES_OPERATIONS])
|
||||
|
||||
if "honoraire" in row[RECAPITULATIF_DES_OPERATION]:
|
||||
if "honoraire" in row[RECAPITULATIF_DES_OPERATIONS]:
|
||||
r["Fournisseur"] = "IMI GERANCE"
|
||||
|
||||
extracted.append(r)
|
||||
@ -69,4 +82,5 @@ def table2df(tables):
|
||||
)
|
||||
df["Fournisseur"] = df["Fournisseur"].fillna(method="ffill")
|
||||
dfs.append(df)
|
||||
return pd.concat(dfs)
|
||||
df = pd.concat(dfs).astype(DF_TYPES, errors="ignore")
|
||||
return df
|
||||
|
@ -1,5 +1,22 @@
|
||||
import pandas as pd
|
||||
|
||||
DF_TYPES = {
|
||||
"Locataires": str,
|
||||
"Période": str,
|
||||
"Loyers": float,
|
||||
"Taxes": float,
|
||||
"Provisions": float,
|
||||
"Divers": str,
|
||||
"Total": float,
|
||||
"Réglés": float,
|
||||
"Impayés": float,
|
||||
"immeuble": str,
|
||||
"mois": str,
|
||||
"annee": str,
|
||||
"Lot": str,
|
||||
"Type": str,
|
||||
}
|
||||
|
||||
|
||||
def is_it(page_text):
|
||||
if "SITUATION DES LOCATAIRES" in page_text:
|
||||
@ -131,4 +148,4 @@ def flat_tables(tables):
|
||||
def table2df(tables):
|
||||
tables = flat_tables(tables)
|
||||
joined = join_row(tables)
|
||||
return pd.DataFrame.from_records(joined)
|
||||
return pd.DataFrame.from_records(joined).astype(DF_TYPES, errors="ignore")
|
||||
|
@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "pdf-oralia"
|
||||
version = "VERSION_PLACEHOLDER"
|
||||
version = "1.dev"
|
||||
description = ""
|
||||
authors = ["Bertrand Benjamin <benjamin.bertrand@opytex.org>"]
|
||||
readme = "README.md"
|
||||
|
Loading…
Reference in New Issue
Block a user