import logging import numpy as np import pandas as pd def get_lot(x): """Return lot number from "RECAPITULATIF DES OPERATIONS" """ if x[:2].isdigit(): return x[:2] if x[:1].isdigit(): return "0" + x[:1] if x[:2] == "PC": return "PC" return "" def extract_charge(table): """From pdfplumber table extract the charge dataframe""" df = ( pd.DataFrame(table[1:], columns=table[0]) .replace("", np.nan) .dropna(subset=["Débits"]) ) drop_index = df[ df["RECAPITULATIF DES OPERATIONS"].str.contains("TOTAUX", case=False) | df["RECAPITULATIF DES OPERATIONS"].str.contains("Solde créditeur", case=False) | df["RECAPITULATIF DES OPERATIONS"].str.contains("Solde débiteur", case=False) ].index df.drop(drop_index, inplace=True) df[""].mask( df["RECAPITULATIF DES OPERATIONS"].str.contains("honoraires", case=False), "IMI GERANCE", inplace=True, ) df = df.assign(lot=df["RECAPITULATIF DES OPERATIONS"].map(get_lot)) df = df.astype( { "Débits": "float64", "Crédits": "float64", "Dont T.V.A.": "float64", "Locatif": "float64", "Déductible": "float64", } ) return df