35 lines
904 B
Python
35 lines
904 B
Python
|
import numpy as np
|
||
|
import pandas as pd
|
||
|
|
||
|
|
||
|
def is_it(page_text):
|
||
|
if "COMPTE RENDU DE GESTION" in page_text:
|
||
|
return True
|
||
|
return False
|
||
|
|
||
|
|
||
|
def extract(table, additionnal_fields: dict = {}):
|
||
|
"""Extract "remise commercial" from first page"""
|
||
|
extracted = []
|
||
|
header = table[0]
|
||
|
for row in table[1:]:
|
||
|
if "Remise commerciale gérance" in row:
|
||
|
r = dict()
|
||
|
for i, value in enumerate(row):
|
||
|
r[header[i]] = value
|
||
|
for k, v in additionnal_fields.items():
|
||
|
r[k] = v
|
||
|
extracted.append(r)
|
||
|
|
||
|
return extracted
|
||
|
|
||
|
# df = pd.DataFrame(table[1:], columns=table[0]).replace("", np.nan)
|
||
|
# df = df[
|
||
|
# df["RECAPITULATIF DES OPERATIONS"].str.contains(
|
||
|
# "Remise commerciale gérance", case=False, na=False
|
||
|
# )
|
||
|
# ]
|
||
|
#
|
||
|
# df.columns.values[0] = "Fournisseur"
|
||
|
# return df
|