From b0333cddd8481d8f3742935003fd884dc78e3443 Mon Sep 17 00:00:00 2001 From: Bertrand Benjamin Date: Wed, 20 Sep 2023 09:22:50 +0200 Subject: [PATCH] fix: raise a warning when a page is not recognized --- pdf_oralia/extract.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pdf_oralia/extract.py b/pdf_oralia/extract.py index 3407f56..f7e2dba 100644 --- a/pdf_oralia/extract.py +++ b/pdf_oralia/extract.py @@ -45,7 +45,7 @@ def from_pdf(pdf): charge_tables = [] patrimoie_tables = [] - for page in pdf.pages: + for page_number, page in enumerate(pdf.pages): page_text = page.extract_text() date = extract_date(page_text) additionnal_fields = { @@ -76,7 +76,7 @@ def from_pdf(pdf): pass else: - raise ValueError("Page non reconnu") + logging.warning(f"Page {page_number+1} non reconnu. Page ignorée.") df_charge = charge.table2df(recapitulatif_tables + charge_tables) df_loc = locataire.table2df(loc_tables)