diff --git a/pdf_oralia/extract_charge.py b/pdf_oralia/extract_charge.py index 4c49926..5f7150a 100644 --- a/pdf_oralia/extract_charge.py +++ b/pdf_oralia/extract_charge.py @@ -22,6 +22,7 @@ def extract_charge(table): .replace("", np.nan) .dropna(subset=["Débits", "Crédits"], how="all") ) + drop_index = df[ df["RECAPITULATIF DES OPERATIONS"].str.contains("TOTAUX", case=False) | df["RECAPITULATIF DES OPERATIONS"].str.contains("Solde créditeur", case=False) @@ -50,14 +51,18 @@ def extract_charge(table): } ) + df.columns.values[0] = "Fournisseur" return df def extract_remise_com(table): """Extract "remise commercial" from first page""" df = pd.DataFrame(table[1:], columns=table[0]).replace("", np.nan) - return df[ + df = df[ df["RECAPITULATIF DES OPERATIONS"].str.contains( "Remise commerciale gérance", case=False, na=False ) ] + + df.columns.values[0] = "Fournisseur" + return df