pdf_auralia/pdf_oralia/extract_charge.py

18 lines
503 B
Python

import numpy as np
import pandas as pd
def extract_charge(table):
"""From pdfplumber table extract the charge dataframe"""
df = (
pd.DataFrame(table[1:], columns=table[0])
.replace("", np.nan)
.dropna(subset=["Débits"])
)
drop_index = df[
df["RECAPITULATIF DES OPERATIONS"].str.contains("TOTAUX", case=False)
| df["RECAPITULATIF DES OPERATIONS"].str.contains("solde", case=False)
].index
df.drop(drop_index, inplace=True)
return df