diff --git a/pdf_oralia/extract.py b/pdf_oralia/extract.py index 3407f56..f7e2dba 100644 --- a/pdf_oralia/extract.py +++ b/pdf_oralia/extract.py @@ -45,7 +45,7 @@ def from_pdf(pdf): charge_tables = [] patrimoie_tables = [] - for page in pdf.pages: + for page_number, page in enumerate(pdf.pages): page_text = page.extract_text() date = extract_date(page_text) additionnal_fields = { @@ -76,7 +76,7 @@ def from_pdf(pdf): pass else: - raise ValueError("Page non reconnu") + logging.warning(f"Page {page_number+1} non reconnu. Page ignorée.") df_charge = charge.table2df(recapitulatif_tables + charge_tables) df_loc = locataire.table2df(loc_tables)