Feat: join locataire table with pagebreak
This commit is contained in:
parent
9a09ae0948
commit
c0c550bd59
@ -14,7 +14,7 @@ charge_table_settings = {
|
|||||||
|
|
||||||
def extract_from_pdf(pdf, charge_dest, location_dest):
|
def extract_from_pdf(pdf, charge_dest, location_dest):
|
||||||
"""Build charge_dest and location_dest xlsx file from pdf"""
|
"""Build charge_dest and location_dest xlsx file from pdf"""
|
||||||
loc_table = []
|
loc_tables = []
|
||||||
for page in pdf.pages[1:]:
|
for page in pdf.pages[1:]:
|
||||||
page_text = page.extract_text()
|
page_text = page.extract_text()
|
||||||
situation_loc_line = [
|
situation_loc_line = [
|
||||||
@ -22,10 +22,10 @@ def extract_from_pdf(pdf, charge_dest, location_dest):
|
|||||||
]
|
]
|
||||||
if situation_loc_line:
|
if situation_loc_line:
|
||||||
mois, annee = situation_loc_line[0].split(" ")[-2:]
|
mois, annee = situation_loc_line[0].split(" ")[-2:]
|
||||||
if loc_table:
|
if loc_tables:
|
||||||
loc_table += page.extract_table()[1:]
|
loc_tables.append(page.extract_table()[1:])
|
||||||
else:
|
else:
|
||||||
loc_table = page.extract_table()
|
loc_tables.append(page.extract_table())
|
||||||
|
|
||||||
elif "HONORAIRES" in page_text:
|
elif "HONORAIRES" in page_text:
|
||||||
table = page.extract_table(charge_table_settings)
|
table = page.extract_table(charge_table_settings)
|
||||||
@ -33,7 +33,7 @@ def extract_from_pdf(pdf, charge_dest, location_dest):
|
|||||||
df_charge.to_excel(charge_dest, sheet_name="Charges", index=False)
|
df_charge.to_excel(charge_dest, sheet_name="Charges", index=False)
|
||||||
logging.info(f"{charge_dest} saved")
|
logging.info(f"{charge_dest} saved")
|
||||||
|
|
||||||
df_loc = extract_situation_loc(loc_table, mois=mois, annee=annee)
|
df_loc = extract_situation_loc(loc_tables, mois=mois, annee=annee)
|
||||||
df_loc = df_loc.assign()
|
df_loc = df_loc.assign()
|
||||||
df_loc.to_excel(location_dest, sheet_name="Location", index=False)
|
df_loc.to_excel(location_dest, sheet_name="Location", index=False)
|
||||||
logging.info(f"{location_dest} saved")
|
logging.info(f"{location_dest} saved")
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
import logging
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
@ -18,8 +20,39 @@ def parse_above_loc(content):
|
|||||||
return pd.Series(row)
|
return pd.Series(row)
|
||||||
|
|
||||||
|
|
||||||
def extract_situation_loc(table, mois, annee):
|
def join_row(last, next):
|
||||||
|
row = []
|
||||||
|
for i in range(len(last)):
|
||||||
|
if last[i] and next[i]:
|
||||||
|
row.append(f"{last[i]}\n{next[i]}")
|
||||||
|
elif last[i]:
|
||||||
|
row.append(last[i])
|
||||||
|
elif next[i]:
|
||||||
|
row.append(next[i])
|
||||||
|
else:
|
||||||
|
row.append("")
|
||||||
|
return row
|
||||||
|
|
||||||
|
|
||||||
|
def join_tables(tables):
|
||||||
|
|
||||||
|
joined = tables[0]
|
||||||
|
|
||||||
|
for t in tables[1:]:
|
||||||
|
last_row = joined[-1]
|
||||||
|
if "Totaux" not in last_row[0]:
|
||||||
|
first_row = t[0]
|
||||||
|
joined_row = join_row(last_row, first_row)
|
||||||
|
joined = joined[:-1] + [joined_row] + t[1:]
|
||||||
|
else:
|
||||||
|
joined += t
|
||||||
|
|
||||||
|
return joined
|
||||||
|
|
||||||
|
|
||||||
|
def extract_situation_loc(tables, mois, annee):
|
||||||
"""From pdfplumber table extract locataire df"""
|
"""From pdfplumber table extract locataire df"""
|
||||||
|
table = join_tables(tables)
|
||||||
try:
|
try:
|
||||||
df = pd.DataFrame(table[1:], columns=table[0])
|
df = pd.DataFrame(table[1:], columns=table[0])
|
||||||
except IndexError:
|
except IndexError:
|
||||||
|
Loading…
Reference in New Issue
Block a user