pdf_auralia/pdf_oralia/extract_charge.py

28 lines
758 B
Python
Raw Normal View History

2022-09-27 14:07:06 +00:00
import numpy as np
import pandas as pd
def extract_charge(table):
"""From pdfplumber table extract the charge dataframe"""
df = (
pd.DataFrame(table[1:], columns=table[0])
.replace("", np.nan)
.dropna(subset=["Débits"])
2022-09-27 19:14:27 +00:00
.astype(
{
"Débits": "float64",
"Crédits": "float64",
"Dont T.V.A.": "float64",
"Locatif": "float64",
"Déductible": "float64",
}
)
2022-09-27 14:07:06 +00:00
)
drop_index = df[
df["RECAPITULATIF DES OPERATIONS"].str.contains("TOTAUX", case=False)
| df["RECAPITULATIF DES OPERATIONS"].str.contains("solde", case=False)
].index
df.drop(drop_index, inplace=True)
2022-09-27 19:14:27 +00:00
2022-09-27 14:07:06 +00:00
return df