{ "cells": [ { "cell_type": "markdown", "id": "e24ca74b", "metadata": {}, "source": [ "# Extraction des informations pour Oralia" ] }, { "cell_type": "code", "execution_count": 1, "id": "1ac85f0c", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import pdfplumber\n", "from pathlib import Path" ] }, { "cell_type": "code", "execution_count": null, "id": "6b246985", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 2, "id": "b80265f1", "metadata": {}, "outputs": [], "source": [ "pdf_file = Path(\"./pdfs/2022 04 Servient.pdf\")\n", "pdf = pdfplumber.open(pdf_file)" ] }, { "cell_type": "code", "execution_count": 3, "id": "a62448d8", "metadata": {}, "outputs": [], "source": [ "xls_charge = f\"{pdf_file.stem.replace(' ', '_')}_charge.xlsx\"\n", "xls_locataire = f\"{pdf_file.stem.replace(' ', '_')}_locataire.xlsx\"" ] }, { "cell_type": "markdown", "id": "1f503cf5", "metadata": {}, "source": [ "## Page 1: Récapitulatif" ] }, { "cell_type": "code", "execution_count": 4, "id": "ae9eb950", "metadata": {}, "outputs": [], "source": [ "p1 = pdf.pages[0]" ] }, { "cell_type": "code", "execution_count": 5, "id": "32ef66d7", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\"SITUATION DES LOCATAIRES\" in p1.extract_text()" ] }, { "cell_type": "markdown", "id": "50dd9c09", "metadata": {}, "source": [ "## Situation des locataires" ] }, { "cell_type": "code", "execution_count": 6, "id": "e9c0aefd", "metadata": {}, "outputs": [], "source": [ "def extract_situation_loc(table):\n", " df = pd.DataFrame(table[1:], columns=table[0])\n", " rows = []\n", " for i, row in df[df[\"Locataires\"]==\"Totaux\"].iterrows():\n", " above_row_loc = df.iloc[i-1][\"Locataires\"]\n", " up_row = pd.concat([row, \n", " parse_above_loc(above_row_loc),\n", " ])\n", "\n", " rows.append(up_row)\n", " df_cleaned = pd.concat(rows, axis=1).T\n", " df_cleaned.drop([\"Locataires\", \"\", \"Période\"], axis=1, inplace=True)\n", " return df_cleaned\n", " " ] }, { "cell_type": "code", "execution_count": 7, "id": "87e05f50", "metadata": {}, "outputs": [], "source": [ "def parse_above_loc(content):\n", " row = {}\n", " try:\n", " app, loc = content.split(\"\\n\")\n", " except ValueError:\n", " row[\"lot\"] = \"\"\n", " row[\"type\"] = \"\"\n", " row[\"locataire\"] = content\n", " \n", " else:\n", " app_ = app.split(\" \")\n", " row[\"lot\"] = app_[1]\n", " row[\"type\"] = \" \".join(app_[2:])\n", " row[\"locataire\"] = loc\n", " return pd.Series(row)" ] }, { "cell_type": "code", "execution_count": null, "id": "19a1446d", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 8, "id": "8afb23c7", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
LoyersTaxesProvisionsDiversTotalRéglésImpayéslottypelocataire
0342.800.00663.000.001005.801005.800001Loc. CommercialEFFUSION
13473.790.00519.0096.794089.584089.580002Loc. CommercialRAS
2597.200.0031.000.00628.60628.600003Appartement T1KALAI Bernard
3596.590.0031.000.00627.59627.590004Appartement T2PEJAUDIER Adelaide
4468.850.0020.000.00981.03485.00496.030009Appartement T1MANNA Baptiste
5745.390.00191.000.00936.39936.390005Loc. CommercialATELIERS RENAISSANCE
6834.550.0081.000.00915.55915.550006Appartement T3GUELLIER MURIEL
7591.690.0050.000.00641.69641.69Lot 0007 Appartement T1\\nDOMINIKIEWICZ\\nMELANIE
8574.710.0028.000.00602.71602.710008Appartement T1BESSON Léa
91201.100.0087.000.001288.101288.100010Appartement T3FILIPPI Bérengère
10500.460.0028.000.00528.46528.460011Appartement T1LOINE Anaïs
\n", "
" ], "text/plain": [ " Loyers Taxes Provisions Divers Total Réglés Impayés lot \\\n", "0 342.80 0.00 663.00 0.00 1005.80 1005.80 0001 \n", "1 3473.79 0.00 519.00 96.79 4089.58 4089.58 0002 \n", "2 597.20 0.00 31.00 0.00 628.60 628.60 0003 \n", "3 596.59 0.00 31.00 0.00 627.59 627.59 0004 \n", "4 468.85 0.00 20.00 0.00 981.03 485.00 496.03 0009 \n", "5 745.39 0.00 191.00 0.00 936.39 936.39 0005 \n", "6 834.55 0.00 81.00 0.00 915.55 915.55 0006 \n", "7 591.69 0.00 50.00 0.00 641.69 641.69 \n", "8 574.71 0.00 28.00 0.00 602.71 602.71 0008 \n", "9 1201.10 0.00 87.00 0.00 1288.10 1288.10 0010 \n", "10 500.46 0.00 28.00 0.00 528.46 528.46 0011 \n", "\n", " type locataire \n", "0 Loc. Commercial EFFUSION \n", "1 Loc. Commercial RAS \n", "2 Appartement T1 KALAI Bernard \n", "3 Appartement T2 PEJAUDIER Adelaide \n", "4 Appartement T1 MANNA Baptiste \n", "5 Loc. Commercial ATELIERS RENAISSANCE \n", "6 Appartement T3 GUELLIER MURIEL \n", "7 Lot 0007 Appartement T1\\nDOMINIKIEWICZ\\nMELANIE \n", "8 Appartement T1 BESSON Léa \n", "9 Appartement T3 FILIPPI Bérengère \n", "10 Appartement T1 LOINE Anaïs " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p2 = pdf.pages[1]\n", "extract_situation_loc(p2.extract_table())" ] }, { "cell_type": "code", "execution_count": 9, "id": "0e0ddca7", "metadata": {}, "outputs": [], "source": [ "charge_table_settings = {\n", " \"vertical_strategy\": \"lines\",\n", " \"horizontal_strategy\": \"text\",\n", "}\n", "def extract_charge(table):\n", " df = pd.DataFrame(table[1:], columns=table[0]).replace(\"\", np.nan).dropna(subset=[\"Débits\"])\n", " drop_index = df[df[\"RECAPITULATIF DES OPERATIONS\"].str.contains(\"TOTAUX\", case=False)].index\n", " df.drop(drop_index, inplace=True)\n", " return df" ] }, { "cell_type": "code", "execution_count": 10, "id": "b915b220", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RECAPITULATIF DES OPERATIONSDébitsCréditsDont T.V.A.LocatifDéductible
4DIDIER NETTOYAGEPC - ENTRETIEN IMMEUBLE708.58NaN118.10708.58NaN
6TOTAL DIRECT ENERGIEPC TOTAL DIRECT ENERGIE65.70NaN7.0365.70NaN
7EDFPC EDF DU 17.04.202289.56NaN10.2289.56NaN
9PICARD SERVICEFacture du 11/04/202266.76NaN6.0766.76NaN
15V2C MAINTENANCE6 - remplacement circulateur chudière447.70NaN40.70NaN447.70
20IMI GERANCETAVARES NORTE Dylan93.00NaN15.50NaN93.00
23IMI GERANCETAVARES NORTE Dylan173.58NaN28.93NaN173.58
27IMI GERANCETAVARES NORTE Dylan798.72NaN133.12NaN798.72
29NaNHonoraires H.T.979.20NaNNaNNaN979.20
30NaNTVA/Honoraires ( 20.00 % )195.84NaN195.84NaN195.84
\n", "
" ], "text/plain": [ " RECAPITULATIF DES OPERATIONS Débits \\\n", "4 DIDIER NETTOYAGE PC - ENTRETIEN IMMEUBLE 708.58 \n", "6 TOTAL DIRECT ENERGIE PC TOTAL DIRECT ENERGIE 65.70 \n", "7 EDF PC EDF DU 17.04.2022 89.56 \n", "9 PICARD SERVICE Facture du 11/04/2022 66.76 \n", "15 V2C MAINTENANCE 6 - remplacement circulateur chudière 447.70 \n", "20 IMI GERANCE TAVARES NORTE Dylan 93.00 \n", "23 IMI GERANCE TAVARES NORTE Dylan 173.58 \n", "27 IMI GERANCE TAVARES NORTE Dylan 798.72 \n", "29 NaN Honoraires H.T. 979.20 \n", "30 NaN TVA/Honoraires ( 20.00 % ) 195.84 \n", "\n", " Crédits Dont T.V.A. Locatif Déductible \n", "4 NaN 118.10 708.58 NaN \n", "6 NaN 7.03 65.70 NaN \n", "7 NaN 10.22 89.56 NaN \n", "9 NaN 6.07 66.76 NaN \n", "15 NaN 40.70 NaN 447.70 \n", "20 NaN 15.50 NaN 93.00 \n", "23 NaN 28.93 NaN 173.58 \n", "27 NaN 133.12 NaN 798.72 \n", "29 NaN NaN NaN 979.20 \n", "30 NaN 195.84 NaN 195.84 " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p4 = pdf.pages[3]\n", "extract_charge(p4.extract_table(charge_table_settings))" ] }, { "cell_type": "code", "execution_count": 11, "id": "c7b071fa", "metadata": {}, "outputs": [], "source": [ "# im = p4.to_image()\n", "# im.debug_tablefinder(charge_table_settings)" ] }, { "cell_type": "code", "execution_count": 12, "id": "ebe2881a", "metadata": { "scrolled": false }, "outputs": [ { "ename": "ModuleNotFoundError", "evalue": "No module named 'openpyxl'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn [12], line 15\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mHONORAIRES\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m page\u001b[38;5;241m.\u001b[39mextract_text():\n\u001b[1;32m 13\u001b[0m df_charge \u001b[38;5;241m=\u001b[39m extract_charge(page\u001b[38;5;241m.\u001b[39mextract_table(charge_table_settings))\n\u001b[0;32m---> 15\u001b[0m df_charge\u001b[38;5;241m.\u001b[39mto_excel(xls_charge, sheet_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCharges\u001b[39m\u001b[38;5;124m\"\u001b[39m, index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m 18\u001b[0m df_loc \u001b[38;5;241m=\u001b[39m extract_situation_loc(loc_table)\n\u001b[1;32m 19\u001b[0m df_loc \u001b[38;5;241m=\u001b[39m df_loc\u001b[38;5;241m.\u001b[39massign(\n\u001b[1;32m 20\u001b[0m mois \u001b[38;5;241m=\u001b[39m mois,\n\u001b[1;32m 21\u001b[0m annee \u001b[38;5;241m=\u001b[39m annee\n\u001b[1;32m 22\u001b[0m )\n", "File \u001b[0;32m~/.venv/plesna/lib/python3.10/site-packages/pandas/util/_decorators.py:211\u001b[0m, in \u001b[0;36mdeprecate_kwarg.._deprecate_kwarg..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 209\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 210\u001b[0m kwargs[new_arg_name] \u001b[38;5;241m=\u001b[39m new_arg_value\n\u001b[0;32m--> 211\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/.venv/plesna/lib/python3.10/site-packages/pandas/util/_decorators.py:211\u001b[0m, in \u001b[0;36mdeprecate_kwarg.._deprecate_kwarg..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 209\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 210\u001b[0m kwargs[new_arg_name] \u001b[38;5;241m=\u001b[39m new_arg_value\n\u001b[0;32m--> 211\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/.venv/plesna/lib/python3.10/site-packages/pandas/core/generic.py:2373\u001b[0m, in \u001b[0;36mNDFrame.to_excel\u001b[0;34m(self, excel_writer, sheet_name, na_rep, float_format, columns, header, index, index_label, startrow, startcol, engine, merge_cells, encoding, inf_rep, verbose, freeze_panes, storage_options)\u001b[0m\n\u001b[1;32m 2360\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mio\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mformats\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mexcel\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ExcelFormatter\n\u001b[1;32m 2362\u001b[0m formatter \u001b[38;5;241m=\u001b[39m ExcelFormatter(\n\u001b[1;32m 2363\u001b[0m df,\n\u001b[1;32m 2364\u001b[0m na_rep\u001b[38;5;241m=\u001b[39mna_rep,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 2371\u001b[0m inf_rep\u001b[38;5;241m=\u001b[39minf_rep,\n\u001b[1;32m 2372\u001b[0m )\n\u001b[0;32m-> 2373\u001b[0m \u001b[43mformatter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwrite\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2374\u001b[0m \u001b[43m \u001b[49m\u001b[43mexcel_writer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2375\u001b[0m \u001b[43m \u001b[49m\u001b[43msheet_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msheet_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2376\u001b[0m \u001b[43m \u001b[49m\u001b[43mstartrow\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstartrow\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2377\u001b[0m \u001b[43m \u001b[49m\u001b[43mstartcol\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstartcol\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2378\u001b[0m \u001b[43m \u001b[49m\u001b[43mfreeze_panes\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfreeze_panes\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2379\u001b[0m \u001b[43m \u001b[49m\u001b[43mengine\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mengine\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2380\u001b[0m \u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstorage_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2381\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/.venv/plesna/lib/python3.10/site-packages/pandas/io/formats/excel.py:911\u001b[0m, in \u001b[0;36mExcelFormatter.write\u001b[0;34m(self, writer, sheet_name, startrow, startcol, freeze_panes, engine, storage_options)\u001b[0m\n\u001b[1;32m 907\u001b[0m need_save \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 908\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 909\u001b[0m \u001b[38;5;66;03m# error: Cannot instantiate abstract class 'ExcelWriter' with abstract\u001b[39;00m\n\u001b[1;32m 910\u001b[0m \u001b[38;5;66;03m# attributes 'engine', 'save', 'supported_extensions' and 'write_cells'\u001b[39;00m\n\u001b[0;32m--> 911\u001b[0m writer \u001b[38;5;241m=\u001b[39m \u001b[43mExcelWriter\u001b[49m\u001b[43m(\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# type: ignore[abstract]\u001b[39;49;00m\n\u001b[1;32m 912\u001b[0m \u001b[43m \u001b[49m\u001b[43mwriter\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mengine\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mengine\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstorage_options\u001b[49m\n\u001b[1;32m 913\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 914\u001b[0m need_save \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 916\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n", "File \u001b[0;32m~/.venv/plesna/lib/python3.10/site-packages/pandas/io/excel/_openpyxl.py:56\u001b[0m, in \u001b[0;36mOpenpyxlWriter.__init__\u001b[0;34m(self, path, engine, date_format, datetime_format, mode, storage_options, if_sheet_exists, engine_kwargs, **kwargs)\u001b[0m\n\u001b[1;32m 43\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\n\u001b[1;32m 44\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 45\u001b[0m path: FilePath \u001b[38;5;241m|\u001b[39m WriteExcelBuffer \u001b[38;5;241m|\u001b[39m ExcelWriter,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 54\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 55\u001b[0m \u001b[38;5;66;03m# Use the openpyxl module as the Excel writer.\u001b[39;00m\n\u001b[0;32m---> 56\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopenpyxl\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mworkbook\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Workbook\n\u001b[1;32m 58\u001b[0m engine_kwargs \u001b[38;5;241m=\u001b[39m combine_kwargs(engine_kwargs, kwargs)\n\u001b[1;32m 60\u001b[0m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(\n\u001b[1;32m 61\u001b[0m path,\n\u001b[1;32m 62\u001b[0m mode\u001b[38;5;241m=\u001b[39mmode,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 65\u001b[0m engine_kwargs\u001b[38;5;241m=\u001b[39mengine_kwargs,\n\u001b[1;32m 66\u001b[0m )\n", "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'openpyxl'" ] } ], "source": [ "frames = []\n", "loc_table = []\n", "for page in pdf.pages:\n", " situation_loc_line = [l for l in page.extract_text().split(\"\\n\") if \"SITUATION DES LOCATAIRES CRG\" in l]\n", " if situation_loc_line:\n", " mois, annee = situation_loc_line[0].split(\" \")[-2:]\n", " if loc_table:\n", " loc_table += page.extract_table()[1:]\n", " else:\n", " loc_table = page.extract_table()\n", "\n", " if \"HONORAIRES\" in page.extract_text():\n", " df_charge = extract_charge(page.extract_table(charge_table_settings))\n", "\n", " df_charge.to_excel(xls_charge, sheet_name=\"Charges\", index=False)\n", "\n", "\n", "df_loc = extract_situation_loc(loc_table)\n", "df_loc = df_loc.assign(\n", " mois = mois,\n", " annee = annee\n", ")\n", "df_loc.to_excel(xls_locataire, sheet_name=\"Location\", index=False)\n", "#df_loc" ] }, { "cell_type": "code", "execution_count": null, "id": "f2e22a94", "metadata": {}, "outputs": [], "source": [ "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "dad54ca3", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.10" } }, "nbformat": 4, "nbformat_minor": 5 }