diff --git a/pdf_oralia/extract.py b/pdf_oralia/extract.py index fe11878..830870f 100644 --- a/pdf_oralia/extract.py +++ b/pdf_oralia/extract.py @@ -15,6 +15,7 @@ charge_table_settings = { def extract_from_pdf(pdf, charge_dest, location_dest): """Build charge_dest and location_dest xlsx file from pdf""" loc_tables = [] + charge_table = [] for page in pdf.pages[1:]: page_text = page.extract_text() situation_loc_line = [ @@ -27,11 +28,15 @@ def extract_from_pdf(pdf, charge_dest, location_dest): else: loc_tables.append(page.extract_table()) - elif "HONORAIRES" in page_text: - table = page.extract_table(charge_table_settings) - df_charge = extract_charge(table) - df_charge.to_excel(charge_dest, sheet_name="Charges", index=False) - logging.info(f"{charge_dest} saved") + elif "RECAPITULATIF DES OPERATIONS" in page_text: + if charge_table: + charge_table += page.extract_table(charge_table_settings)[1:] + else: + charge_table = page.extract_table(charge_table_settings) + + df_charge = extract_charge(charge_table) + df_charge.to_excel(charge_dest, sheet_name="Charges", index=False) + logging.info(f"{charge_dest} saved") df_loc = extract_situation_loc(loc_tables, mois=mois, annee=annee) df_loc = df_loc.assign()