Feat: extract date from top of page
This commit is contained in:
parent
e64fb63129
commit
bde6a0dfc6
@ -1,4 +1,5 @@
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
@ -13,6 +14,19 @@ charge_table_settings = {
|
||||
}
|
||||
|
||||
|
||||
def extract_date(page_text):
|
||||
"""Extract date from a page
|
||||
|
||||
:param page_text: text in the page
|
||||
:return: the extracted date
|
||||
"""
|
||||
blocs = page_text.split("\n")
|
||||
for b in blocs:
|
||||
if "Lyon le" in b:
|
||||
words = b.split(" ")
|
||||
return datetime.strptime(words[-1], "%d/%m/%Y")
|
||||
|
||||
|
||||
def extract_from_pdf(pdf, charge_dest, location_dest):
|
||||
"""Build charge_dest and location_dest xlsx file from pdf"""
|
||||
loc_tables = []
|
||||
@ -27,8 +41,11 @@ def extract_from_pdf(pdf, charge_dest, location_dest):
|
||||
situation_loc_line = [
|
||||
l for l in page_text.split("\n") if "SITUATION DES LOCATAIRES" in l
|
||||
]
|
||||
date = extract_date(page_text)
|
||||
mois = date.strftime("%m")
|
||||
annee = date.strftime("%Y")
|
||||
if situation_loc_line:
|
||||
mois, annee = situation_loc_line[0].split(" ")[-2:]
|
||||
# mois, annee = situation_loc_line[0].split(" ")[-2:]
|
||||
if loc_tables:
|
||||
loc_tables.append(page.extract_table()[1:])
|
||||
else:
|
||||
|
Loading…
Reference in New Issue
Block a user