2022-09-28 13:23:21 +00:00
|
|
|
import logging
|
|
|
|
|
2022-09-27 14:07:06 +00:00
|
|
|
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
|
def parse_above_loc(content):
|
|
|
|
row = {}
|
2022-09-28 13:35:21 +00:00
|
|
|
app, loc, *_ = content.split("\n")
|
|
|
|
app_ = app.split(" ")
|
2022-12-18 09:11:15 +00:00
|
|
|
row["lot"] = f"{int(app_[1]):02d}"
|
2022-09-28 13:35:21 +00:00
|
|
|
row["type"] = " ".join(app_[2:])
|
|
|
|
row["locataire"] = loc
|
2022-09-27 14:07:06 +00:00
|
|
|
return pd.Series(row)
|
|
|
|
|
|
|
|
|
2022-09-28 13:23:21 +00:00
|
|
|
def join_row(last, next):
|
|
|
|
row = []
|
|
|
|
for i in range(len(last)):
|
|
|
|
if last[i] and next[i]:
|
|
|
|
row.append(f"{last[i]}\n{next[i]}")
|
|
|
|
elif last[i]:
|
|
|
|
row.append(last[i])
|
|
|
|
elif next[i]:
|
|
|
|
row.append(next[i])
|
|
|
|
else:
|
|
|
|
row.append("")
|
|
|
|
return row
|
|
|
|
|
|
|
|
|
|
|
|
def join_tables(tables):
|
|
|
|
|
|
|
|
joined = tables[0]
|
|
|
|
|
|
|
|
for t in tables[1:]:
|
|
|
|
last_row = joined[-1]
|
|
|
|
if "Totaux" not in last_row[0]:
|
|
|
|
first_row = t[0]
|
|
|
|
joined_row = join_row(last_row, first_row)
|
|
|
|
joined = joined[:-1] + [joined_row] + t[1:]
|
|
|
|
else:
|
|
|
|
joined += t
|
|
|
|
|
|
|
|
return joined
|
|
|
|
|
|
|
|
|
|
|
|
def extract_situation_loc(tables, mois, annee):
|
2022-09-27 14:07:06 +00:00
|
|
|
"""From pdfplumber table extract locataire df"""
|
2022-09-28 13:23:21 +00:00
|
|
|
table = join_tables(tables)
|
2022-09-27 14:07:06 +00:00
|
|
|
try:
|
|
|
|
df = pd.DataFrame(table[1:], columns=table[0])
|
|
|
|
except IndexError:
|
|
|
|
print(table)
|
|
|
|
rows = []
|
|
|
|
for i, row in df[df["Locataires"] == "Totaux"].iterrows():
|
|
|
|
above_row_loc = df.iloc[i - 1]["Locataires"]
|
|
|
|
up_row = pd.concat(
|
|
|
|
[
|
|
|
|
row,
|
|
|
|
parse_above_loc(above_row_loc),
|
|
|
|
]
|
|
|
|
)
|
|
|
|
|
|
|
|
rows.append(up_row)
|
|
|
|
df_cleaned = pd.concat(rows, axis=1).T
|
|
|
|
df_cleaned.drop(["Locataires", "", "Période"], axis=1, inplace=True)
|
2022-09-27 19:14:27 +00:00
|
|
|
|
|
|
|
df_cleaned = df_cleaned.astype(
|
|
|
|
{
|
|
|
|
"Loyers": "float64",
|
|
|
|
"Taxes": "float64",
|
|
|
|
"Provisions": "float64",
|
|
|
|
"Divers": "float64",
|
|
|
|
"Total": "float64",
|
|
|
|
"Réglés": "float64",
|
|
|
|
"Impayés": "float64",
|
|
|
|
},
|
|
|
|
errors="ignore",
|
|
|
|
)
|
|
|
|
|
|
|
|
df_cleaned = df_cleaned.assign(mois=mois, annee=annee)
|
2022-09-27 14:07:06 +00:00
|
|
|
return df_cleaned
|