Fix: when charge table was on multiple pages

This commit is contained in:
Bertrand Benjamin 2022-09-28 20:43:31 +02:00
parent 9240f5ae4c
commit fc15bb8303
1 changed files with 10 additions and 5 deletions

View File

@ -15,6 +15,7 @@ charge_table_settings = {
def extract_from_pdf(pdf, charge_dest, location_dest): def extract_from_pdf(pdf, charge_dest, location_dest):
"""Build charge_dest and location_dest xlsx file from pdf""" """Build charge_dest and location_dest xlsx file from pdf"""
loc_tables = [] loc_tables = []
charge_table = []
for page in pdf.pages[1:]: for page in pdf.pages[1:]:
page_text = page.extract_text() page_text = page.extract_text()
situation_loc_line = [ situation_loc_line = [
@ -27,11 +28,15 @@ def extract_from_pdf(pdf, charge_dest, location_dest):
else: else:
loc_tables.append(page.extract_table()) loc_tables.append(page.extract_table())
elif "HONORAIRES" in page_text: elif "RECAPITULATIF DES OPERATIONS" in page_text:
table = page.extract_table(charge_table_settings) if charge_table:
df_charge = extract_charge(table) charge_table += page.extract_table(charge_table_settings)[1:]
df_charge.to_excel(charge_dest, sheet_name="Charges", index=False) else:
logging.info(f"{charge_dest} saved") charge_table = page.extract_table(charge_table_settings)
df_charge = extract_charge(charge_table)
df_charge.to_excel(charge_dest, sheet_name="Charges", index=False)
logging.info(f"{charge_dest} saved")
df_loc = extract_situation_loc(loc_tables, mois=mois, annee=annee) df_loc = extract_situation_loc(loc_tables, mois=mois, annee=annee)
df_loc = df_loc.assign() df_loc = df_loc.assign()