pdf_auralia/pdf_oralia/extract_charge.py

37 lines
898 B
Python

import logging
import numpy as np
import pandas as pd
def extract_charge(table):
"""From pdfplumber table extract the charge dataframe"""
df = (
pd.DataFrame(table[1:], columns=table[0])
.replace("", np.nan)
.dropna(subset=["Débits"])
)
drop_index = df[
df["RECAPITULATIF DES OPERATIONS"].str.contains("TOTAUX", case=False)
| df["RECAPITULATIF DES OPERATIONS"].str.contains("solde", case=False)
].index
df.drop(drop_index, inplace=True)
df[""].mask(
df["RECAPITULATIF DES OPERATIONS"].str.contains("honoraires", case=False),
"IMI GERANCE",
inplace=True,
)
df = df.astype(
{
"Débits": "float64",
"Crédits": "float64",
"Dont T.V.A.": "float64",
"Locatif": "float64",
"Déductible": "float64",
}
)
return df