From 67bea5b9b48f5b55f281ae742d58352646124c6e Mon Sep 17 00:00:00 2001 From: Bertrand Benjamin Date: Wed, 28 Sep 2022 20:49:58 +0200 Subject: [PATCH] Fix: smarter exclusion in charge --- pdf_oralia/extract_charge.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pdf_oralia/extract_charge.py b/pdf_oralia/extract_charge.py index 18bd267..d33a2f0 100644 --- a/pdf_oralia/extract_charge.py +++ b/pdf_oralia/extract_charge.py @@ -6,6 +6,8 @@ import pandas as pd def extract_charge(table): """From pdfplumber table extract the charge dataframe""" + for l in table: + print(l) df = ( pd.DataFrame(table[1:], columns=table[0]) .replace("", np.nan) @@ -13,7 +15,8 @@ def extract_charge(table): ) drop_index = df[ df["RECAPITULATIF DES OPERATIONS"].str.contains("TOTAUX", case=False) - | df["RECAPITULATIF DES OPERATIONS"].str.contains("solde", case=False) + | df["RECAPITULATIF DES OPERATIONS"].str.contains("Solde créditeur", case=False) + | df["RECAPITULATIF DES OPERATIONS"].str.contains("Solde débiteur", case=False) ].index df.drop(drop_index, inplace=True)