import pandas as pd def parse_above_loc(content): row = {} try: app, loc = content.split("\n") except ValueError: row["lot"] = "" row["type"] = "" row["locataire"] = content else: app_ = app.split(" ") row["lot"] = app_[1] row["type"] = " ".join(app_[2:]) row["locataire"] = loc return pd.Series(row) def extract_situation_loc(table): """From pdfplumber table extract locataire df""" try: df = pd.DataFrame(table[1:], columns=table[0]) except IndexError: print(table) rows = [] for i, row in df[df["Locataires"] == "Totaux"].iterrows(): above_row_loc = df.iloc[i - 1]["Locataires"] up_row = pd.concat( [ row, parse_above_loc(above_row_loc), ] ) rows.append(up_row) df_cleaned = pd.concat(rows, axis=1).T df_cleaned.drop(["Locataires", "", "PĂ©riode"], axis=1, inplace=True) return df_cleaned