Compare commits
2 Commits
406b89fea1
...
0040dccd9a
Author | SHA1 | Date | |
---|---|---|---|
0040dccd9a | |||
b0333cddd8 |
@ -45,7 +45,7 @@ def from_pdf(pdf):
|
||||
charge_tables = []
|
||||
patrimoie_tables = []
|
||||
|
||||
for page in pdf.pages:
|
||||
for page_number, page in enumerate(pdf.pages):
|
||||
page_text = page.extract_text()
|
||||
date = extract_date(page_text)
|
||||
additionnal_fields = {
|
||||
@ -76,7 +76,7 @@ def from_pdf(pdf):
|
||||
pass
|
||||
|
||||
else:
|
||||
raise ValueError("Page non reconnu")
|
||||
logging.warning(f"Page {page_number+1} non reconnu. Page ignorée.")
|
||||
|
||||
df_charge = charge.table2df(recapitulatif_tables + charge_tables)
|
||||
df_loc = locataire.table2df(loc_tables)
|
||||
|
@ -32,7 +32,10 @@ def is_it(page_text):
|
||||
def get_lot(txt):
|
||||
"""Return lot number from "RECAPITULATIF DES OPERATIONS" """
|
||||
regex = r"[BSM](\d+)(?=\s*-)"
|
||||
result = re.findall(regex, txt)
|
||||
try:
|
||||
result = re.findall(regex, txt)
|
||||
except TypeError:
|
||||
return "*"
|
||||
if result:
|
||||
return "{:02d}".format(int(result[0]))
|
||||
return "*"
|
||||
|
Loading…
Reference in New Issue
Block a user