2022-09-28 07:56:35 +00:00
|
|
|
import logging
|
|
|
|
|
2022-09-27 14:07:06 +00:00
|
|
|
import numpy as np
|
|
|
|
import pandas as pd
|
|
|
|
|
|
|
|
|
2022-09-28 19:03:53 +00:00
|
|
|
def get_lot(x):
|
|
|
|
"""Return lot number from "RECAPITULATIF DES OPERATIONS" """
|
|
|
|
if x[:2].isdigit():
|
|
|
|
return x[:2]
|
|
|
|
if x[:1].isdigit():
|
|
|
|
return "0" + x[:1]
|
|
|
|
if x[:2] == "PC":
|
|
|
|
return "PC"
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
2022-09-27 14:07:06 +00:00
|
|
|
def extract_charge(table):
|
|
|
|
"""From pdfplumber table extract the charge dataframe"""
|
|
|
|
df = (
|
|
|
|
pd.DataFrame(table[1:], columns=table[0])
|
|
|
|
.replace("", np.nan)
|
2022-10-10 19:30:11 +00:00
|
|
|
.dropna(subset=["Débits", "Crédits"], how="all")
|
2022-09-27 14:07:06 +00:00
|
|
|
)
|
|
|
|
drop_index = df[
|
|
|
|
df["RECAPITULATIF DES OPERATIONS"].str.contains("TOTAUX", case=False)
|
2022-09-28 18:49:58 +00:00
|
|
|
| df["RECAPITULATIF DES OPERATIONS"].str.contains("Solde créditeur", case=False)
|
|
|
|
| df["RECAPITULATIF DES OPERATIONS"].str.contains("Solde débiteur", case=False)
|
2022-10-10 19:30:11 +00:00
|
|
|
| df["RECAPITULATIF DES OPERATIONS"].str.contains(
|
|
|
|
"Total des reglements locataires", case=False
|
|
|
|
)
|
2022-09-27 14:07:06 +00:00
|
|
|
].index
|
|
|
|
df.drop(drop_index, inplace=True)
|
2022-09-27 19:14:27 +00:00
|
|
|
|
2022-09-27 19:28:54 +00:00
|
|
|
df[""].mask(
|
|
|
|
df["RECAPITULATIF DES OPERATIONS"].str.contains("honoraires", case=False),
|
|
|
|
"IMI GERANCE",
|
|
|
|
inplace=True,
|
|
|
|
)
|
|
|
|
|
2022-09-28 19:03:53 +00:00
|
|
|
df = df.assign(lot=df["RECAPITULATIF DES OPERATIONS"].map(get_lot))
|
|
|
|
|
2022-09-28 07:56:35 +00:00
|
|
|
df = df.astype(
|
|
|
|
{
|
|
|
|
"Débits": "float64",
|
|
|
|
"Crédits": "float64",
|
|
|
|
"Dont T.V.A.": "float64",
|
|
|
|
"Locatif": "float64",
|
|
|
|
"Déductible": "float64",
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
2022-09-27 14:07:06 +00:00
|
|
|
return df
|
2022-10-10 19:53:12 +00:00
|
|
|
|
|
|
|
|
|
|
|
def extract_remise_com(table):
|
|
|
|
"""Extract "remise commercial" from first page"""
|
|
|
|
df = pd.DataFrame(table[1:], columns=table[0]).replace("", np.nan)
|
|
|
|
return df[
|
|
|
|
df["RECAPITULATIF DES OPERATIONS"].str.contains(
|
|
|
|
"Remise commerciale gérance", case=False, na=False
|
|
|
|
)
|
|
|
|
]
|