I9_juillet23 #10
@ -45,7 +45,7 @@ def from_pdf(pdf):
|
|||||||
charge_tables = []
|
charge_tables = []
|
||||||
patrimoie_tables = []
|
patrimoie_tables = []
|
||||||
|
|
||||||
for page in pdf.pages:
|
for page_number, page in enumerate(pdf.pages):
|
||||||
page_text = page.extract_text()
|
page_text = page.extract_text()
|
||||||
date = extract_date(page_text)
|
date = extract_date(page_text)
|
||||||
additionnal_fields = {
|
additionnal_fields = {
|
||||||
@ -76,7 +76,7 @@ def from_pdf(pdf):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise ValueError("Page non reconnu")
|
logging.warning(f"Page {page_number+1} non reconnu. Page ignorée.")
|
||||||
|
|
||||||
df_charge = charge.table2df(recapitulatif_tables + charge_tables)
|
df_charge = charge.table2df(recapitulatif_tables + charge_tables)
|
||||||
df_loc = locataire.table2df(loc_tables)
|
df_loc = locataire.table2df(loc_tables)
|
||||||
|
@ -32,7 +32,10 @@ def is_it(page_text):
|
|||||||
def get_lot(txt):
|
def get_lot(txt):
|
||||||
"""Return lot number from "RECAPITULATIF DES OPERATIONS" """
|
"""Return lot number from "RECAPITULATIF DES OPERATIONS" """
|
||||||
regex = r"[BSM](\d+)(?=\s*-)"
|
regex = r"[BSM](\d+)(?=\s*-)"
|
||||||
|
try:
|
||||||
result = re.findall(regex, txt)
|
result = re.findall(regex, txt)
|
||||||
|
except TypeError:
|
||||||
|
return "*"
|
||||||
if result:
|
if result:
|
||||||
return "{:02d}".format(int(result[0]))
|
return "{:02d}".format(int(result[0]))
|
||||||
return "*"
|
return "*"
|
||||||
|
Loading…
Reference in New Issue
Block a user