Feat: specify page type before extracting it
This commit is contained in:
parent
ce8cdc4c1e
commit
4ee78a7e7b
@ -50,26 +50,33 @@ def pdf_extract_tables_lines(pdf):
|
|||||||
"mois": date.strftime("%m"),
|
"mois": date.strftime("%m"),
|
||||||
"annee": date.strftime("%Y"),
|
"annee": date.strftime("%Y"),
|
||||||
}
|
}
|
||||||
|
table_type = ""
|
||||||
|
if locataire.is_it(page_text):
|
||||||
|
table_type = "locataire"
|
||||||
|
elif charge.is_it(page_text):
|
||||||
|
table_type = "charge"
|
||||||
|
elif patrimoine.is_it(page_text):
|
||||||
|
table_type = "patrimoine"
|
||||||
|
else:
|
||||||
|
logging.warning(f"Page {page_number} non reconnu. Page ignorée.")
|
||||||
|
|
||||||
for line in page.extract_table(extract_table_settings):
|
for line in page.extract_table(extract_table_settings):
|
||||||
if locataire.is_it(page_text):
|
if table_type == "locataire":
|
||||||
res = loc_sink.send(line)
|
res = loc_sink.send(line)
|
||||||
if res:
|
if res:
|
||||||
res.update(additionnal_fields)
|
res.update(additionnal_fields)
|
||||||
yield locataire.Line(**res)
|
yield locataire.Line(**res)
|
||||||
elif charge.is_it(page_text):
|
elif table_type == "charge":
|
||||||
res = charge_sink.send(line)
|
res = charge_sink.send(line)
|
||||||
if res:
|
if res:
|
||||||
res.update(additionnal_fields)
|
res.update(additionnal_fields)
|
||||||
yield charge.Line(**res)
|
yield charge.Line(**res)
|
||||||
|
|
||||||
elif patrimoine.is_it(page_text):
|
elif table_type == "patrimoine":
|
||||||
res = patrimoine_sink.send(line)
|
res = patrimoine_sink.send(line)
|
||||||
if res:
|
if res:
|
||||||
res.update(additionnal_fields)
|
res.update(additionnal_fields)
|
||||||
yield patrimoine.Line(**res)
|
yield patrimoine.Line(**res)
|
||||||
else:
|
|
||||||
logging.warning(f"Page {page_number} non reconnu. Page ignorée.")
|
|
||||||
|
|
||||||
page_number += 1
|
page_number += 1
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user