Feat: specify page type before extracting it

This commit is contained in:
Bertrand Benjamin 2025-02-26 05:58:38 +01:00
parent ce8cdc4c1e
commit 4ee78a7e7b

View File

@ -50,26 +50,33 @@ def pdf_extract_tables_lines(pdf):
"mois": date.strftime("%m"),
"annee": date.strftime("%Y"),
}
table_type = ""
if locataire.is_it(page_text):
table_type = "locataire"
elif charge.is_it(page_text):
table_type = "charge"
elif patrimoine.is_it(page_text):
table_type = "patrimoine"
else:
logging.warning(f"Page {page_number} non reconnu. Page ignorée.")
for line in page.extract_table(extract_table_settings):
if locataire.is_it(page_text):
if table_type == "locataire":
res = loc_sink.send(line)
if res:
res.update(additionnal_fields)
yield locataire.Line(**res)
elif charge.is_it(page_text):
elif table_type == "charge":
res = charge_sink.send(line)
if res:
res.update(additionnal_fields)
yield charge.Line(**res)
elif patrimoine.is_it(page_text):
elif table_type == "patrimoine":
res = patrimoine_sink.send(line)
if res:
res.update(additionnal_fields)
yield patrimoine.Line(**res)
else:
logging.warning(f"Page {page_number} non reconnu. Page ignorée.")
page_number += 1