Feat: join locataire table with pagebreak
This commit is contained in:
parent
9a09ae0948
commit
c0c550bd59
@ -14,7 +14,7 @@ charge_table_settings = {
|
||||
|
||||
def extract_from_pdf(pdf, charge_dest, location_dest):
|
||||
"""Build charge_dest and location_dest xlsx file from pdf"""
|
||||
loc_table = []
|
||||
loc_tables = []
|
||||
for page in pdf.pages[1:]:
|
||||
page_text = page.extract_text()
|
||||
situation_loc_line = [
|
||||
@ -22,10 +22,10 @@ def extract_from_pdf(pdf, charge_dest, location_dest):
|
||||
]
|
||||
if situation_loc_line:
|
||||
mois, annee = situation_loc_line[0].split(" ")[-2:]
|
||||
if loc_table:
|
||||
loc_table += page.extract_table()[1:]
|
||||
if loc_tables:
|
||||
loc_tables.append(page.extract_table()[1:])
|
||||
else:
|
||||
loc_table = page.extract_table()
|
||||
loc_tables.append(page.extract_table())
|
||||
|
||||
elif "HONORAIRES" in page_text:
|
||||
table = page.extract_table(charge_table_settings)
|
||||
@ -33,7 +33,7 @@ def extract_from_pdf(pdf, charge_dest, location_dest):
|
||||
df_charge.to_excel(charge_dest, sheet_name="Charges", index=False)
|
||||
logging.info(f"{charge_dest} saved")
|
||||
|
||||
df_loc = extract_situation_loc(loc_table, mois=mois, annee=annee)
|
||||
df_loc = extract_situation_loc(loc_tables, mois=mois, annee=annee)
|
||||
df_loc = df_loc.assign()
|
||||
df_loc.to_excel(location_dest, sheet_name="Location", index=False)
|
||||
logging.info(f"{location_dest} saved")
|
||||
|
@ -1,3 +1,5 @@
|
||||
import logging
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
@ -18,8 +20,39 @@ def parse_above_loc(content):
|
||||
return pd.Series(row)
|
||||
|
||||
|
||||
def extract_situation_loc(table, mois, annee):
|
||||
def join_row(last, next):
|
||||
row = []
|
||||
for i in range(len(last)):
|
||||
if last[i] and next[i]:
|
||||
row.append(f"{last[i]}\n{next[i]}")
|
||||
elif last[i]:
|
||||
row.append(last[i])
|
||||
elif next[i]:
|
||||
row.append(next[i])
|
||||
else:
|
||||
row.append("")
|
||||
return row
|
||||
|
||||
|
||||
def join_tables(tables):
|
||||
|
||||
joined = tables[0]
|
||||
|
||||
for t in tables[1:]:
|
||||
last_row = joined[-1]
|
||||
if "Totaux" not in last_row[0]:
|
||||
first_row = t[0]
|
||||
joined_row = join_row(last_row, first_row)
|
||||
joined = joined[:-1] + [joined_row] + t[1:]
|
||||
else:
|
||||
joined += t
|
||||
|
||||
return joined
|
||||
|
||||
|
||||
def extract_situation_loc(tables, mois, annee):
|
||||
"""From pdfplumber table extract locataire df"""
|
||||
table = join_tables(tables)
|
||||
try:
|
||||
df = pd.DataFrame(table[1:], columns=table[0])
|
||||
except IndexError:
|
||||
|
Loading…
Reference in New Issue
Block a user