41 lines
1018 B
Python
41 lines
1018 B
Python
import pandas as pd
|
|
|
|
|
|
def parse_above_loc(content):
|
|
row = {}
|
|
try:
|
|
app, loc = content.split("\n")
|
|
except ValueError:
|
|
row["lot"] = ""
|
|
row["type"] = ""
|
|
row["locataire"] = content
|
|
|
|
else:
|
|
app_ = app.split(" ")
|
|
row["lot"] = app_[1]
|
|
row["type"] = " ".join(app_[2:])
|
|
row["locataire"] = loc
|
|
return pd.Series(row)
|
|
|
|
|
|
def extract_situation_loc(table):
|
|
"""From pdfplumber table extract locataire df"""
|
|
try:
|
|
df = pd.DataFrame(table[1:], columns=table[0])
|
|
except IndexError:
|
|
print(table)
|
|
rows = []
|
|
for i, row in df[df["Locataires"] == "Totaux"].iterrows():
|
|
above_row_loc = df.iloc[i - 1]["Locataires"]
|
|
up_row = pd.concat(
|
|
[
|
|
row,
|
|
parse_above_loc(above_row_loc),
|
|
]
|
|
)
|
|
|
|
rows.append(up_row)
|
|
df_cleaned = pd.concat(rows, axis=1).T
|
|
df_cleaned.drop(["Locataires", "", "Période"], axis=1, inplace=True)
|
|
return df_cleaned
|