680 lines
28 KiB
Plaintext
680 lines
28 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "e24ca74b",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Extraction des informations pour Oralia"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "1ac85f0c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"import pdfplumber\n",
|
|
"from pathlib import Path"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "6b246985",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "b80265f1",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"pdf_file = Path(\"./pdfs/2022 04 Servient.pdf\")\n",
|
|
"pdf = pdfplumber.open(pdf_file)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "a62448d8",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"xls_charge = f\"{pdf_file.stem.replace(' ', '_')}_charge.xlsx\"\n",
|
|
"xls_locataire = f\"{pdf_file.stem.replace(' ', '_')}_locataire.xlsx\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "1f503cf5",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Page 1: Récapitulatif"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "ae9eb950",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"p1 = pdf.pages[0]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "32ef66d7",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"False"
|
|
]
|
|
},
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"\"SITUATION DES LOCATAIRES\" in p1.extract_text()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "50dd9c09",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Situation des locataires"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"id": "e9c0aefd",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def extract_situation_loc(table):\n",
|
|
" df = pd.DataFrame(table[1:], columns=table[0])\n",
|
|
" rows = []\n",
|
|
" for i, row in df[df[\"Locataires\"]==\"Totaux\"].iterrows():\n",
|
|
" above_row_loc = df.iloc[i-1][\"Locataires\"]\n",
|
|
" up_row = pd.concat([row, \n",
|
|
" parse_above_loc(above_row_loc),\n",
|
|
" ])\n",
|
|
"\n",
|
|
" rows.append(up_row)\n",
|
|
" df_cleaned = pd.concat(rows, axis=1).T\n",
|
|
" df_cleaned.drop([\"Locataires\", \"\", \"Période\"], axis=1, inplace=True)\n",
|
|
" return df_cleaned\n",
|
|
" "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"id": "87e05f50",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def parse_above_loc(content):\n",
|
|
" row = {}\n",
|
|
" try:\n",
|
|
" app, loc = content.split(\"\\n\")\n",
|
|
" except ValueError:\n",
|
|
" row[\"lot\"] = \"\"\n",
|
|
" row[\"type\"] = \"\"\n",
|
|
" row[\"locataire\"] = content\n",
|
|
" \n",
|
|
" else:\n",
|
|
" app_ = app.split(\" \")\n",
|
|
" row[\"lot\"] = app_[1]\n",
|
|
" row[\"type\"] = \" \".join(app_[2:])\n",
|
|
" row[\"locataire\"] = loc\n",
|
|
" return pd.Series(row)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "19a1446d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"id": "8afb23c7",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>Loyers</th>\n",
|
|
" <th>Taxes</th>\n",
|
|
" <th>Provisions</th>\n",
|
|
" <th>Divers</th>\n",
|
|
" <th>Total</th>\n",
|
|
" <th>Réglés</th>\n",
|
|
" <th>Impayés</th>\n",
|
|
" <th>lot</th>\n",
|
|
" <th>type</th>\n",
|
|
" <th>locataire</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>342.80</td>\n",
|
|
" <td>0.00</td>\n",
|
|
" <td>663.00</td>\n",
|
|
" <td>0.00</td>\n",
|
|
" <td>1005.80</td>\n",
|
|
" <td>1005.80</td>\n",
|
|
" <td></td>\n",
|
|
" <td>0001</td>\n",
|
|
" <td>Loc. Commercial</td>\n",
|
|
" <td>EFFUSION</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>3473.79</td>\n",
|
|
" <td>0.00</td>\n",
|
|
" <td>519.00</td>\n",
|
|
" <td>96.79</td>\n",
|
|
" <td>4089.58</td>\n",
|
|
" <td>4089.58</td>\n",
|
|
" <td></td>\n",
|
|
" <td>0002</td>\n",
|
|
" <td>Loc. Commercial</td>\n",
|
|
" <td>RAS</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>597.20</td>\n",
|
|
" <td>0.00</td>\n",
|
|
" <td>31.00</td>\n",
|
|
" <td>0.00</td>\n",
|
|
" <td>628.60</td>\n",
|
|
" <td>628.60</td>\n",
|
|
" <td></td>\n",
|
|
" <td>0003</td>\n",
|
|
" <td>Appartement T1</td>\n",
|
|
" <td>KALAI Bernard</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>596.59</td>\n",
|
|
" <td>0.00</td>\n",
|
|
" <td>31.00</td>\n",
|
|
" <td>0.00</td>\n",
|
|
" <td>627.59</td>\n",
|
|
" <td>627.59</td>\n",
|
|
" <td></td>\n",
|
|
" <td>0004</td>\n",
|
|
" <td>Appartement T2</td>\n",
|
|
" <td>PEJAUDIER Adelaide</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>468.85</td>\n",
|
|
" <td>0.00</td>\n",
|
|
" <td>20.00</td>\n",
|
|
" <td>0.00</td>\n",
|
|
" <td>981.03</td>\n",
|
|
" <td>485.00</td>\n",
|
|
" <td>496.03</td>\n",
|
|
" <td>0009</td>\n",
|
|
" <td>Appartement T1</td>\n",
|
|
" <td>MANNA Baptiste</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td>745.39</td>\n",
|
|
" <td>0.00</td>\n",
|
|
" <td>191.00</td>\n",
|
|
" <td>0.00</td>\n",
|
|
" <td>936.39</td>\n",
|
|
" <td>936.39</td>\n",
|
|
" <td></td>\n",
|
|
" <td>0005</td>\n",
|
|
" <td>Loc. Commercial</td>\n",
|
|
" <td>ATELIERS RENAISSANCE</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>6</th>\n",
|
|
" <td>834.55</td>\n",
|
|
" <td>0.00</td>\n",
|
|
" <td>81.00</td>\n",
|
|
" <td>0.00</td>\n",
|
|
" <td>915.55</td>\n",
|
|
" <td>915.55</td>\n",
|
|
" <td></td>\n",
|
|
" <td>0006</td>\n",
|
|
" <td>Appartement T3</td>\n",
|
|
" <td>GUELLIER MURIEL</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>7</th>\n",
|
|
" <td>591.69</td>\n",
|
|
" <td>0.00</td>\n",
|
|
" <td>50.00</td>\n",
|
|
" <td>0.00</td>\n",
|
|
" <td>641.69</td>\n",
|
|
" <td>641.69</td>\n",
|
|
" <td></td>\n",
|
|
" <td></td>\n",
|
|
" <td></td>\n",
|
|
" <td>Lot 0007 Appartement T1\\nDOMINIKIEWICZ\\nMELANIE</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>8</th>\n",
|
|
" <td>574.71</td>\n",
|
|
" <td>0.00</td>\n",
|
|
" <td>28.00</td>\n",
|
|
" <td>0.00</td>\n",
|
|
" <td>602.71</td>\n",
|
|
" <td>602.71</td>\n",
|
|
" <td></td>\n",
|
|
" <td>0008</td>\n",
|
|
" <td>Appartement T1</td>\n",
|
|
" <td>BESSON Léa</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>9</th>\n",
|
|
" <td>1201.10</td>\n",
|
|
" <td>0.00</td>\n",
|
|
" <td>87.00</td>\n",
|
|
" <td>0.00</td>\n",
|
|
" <td>1288.10</td>\n",
|
|
" <td>1288.10</td>\n",
|
|
" <td></td>\n",
|
|
" <td>0010</td>\n",
|
|
" <td>Appartement T3</td>\n",
|
|
" <td>FILIPPI Bérengère</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>10</th>\n",
|
|
" <td>500.46</td>\n",
|
|
" <td>0.00</td>\n",
|
|
" <td>28.00</td>\n",
|
|
" <td>0.00</td>\n",
|
|
" <td>528.46</td>\n",
|
|
" <td>528.46</td>\n",
|
|
" <td></td>\n",
|
|
" <td>0011</td>\n",
|
|
" <td>Appartement T1</td>\n",
|
|
" <td>LOINE Anaïs</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" Loyers Taxes Provisions Divers Total Réglés Impayés lot \\\n",
|
|
"0 342.80 0.00 663.00 0.00 1005.80 1005.80 0001 \n",
|
|
"1 3473.79 0.00 519.00 96.79 4089.58 4089.58 0002 \n",
|
|
"2 597.20 0.00 31.00 0.00 628.60 628.60 0003 \n",
|
|
"3 596.59 0.00 31.00 0.00 627.59 627.59 0004 \n",
|
|
"4 468.85 0.00 20.00 0.00 981.03 485.00 496.03 0009 \n",
|
|
"5 745.39 0.00 191.00 0.00 936.39 936.39 0005 \n",
|
|
"6 834.55 0.00 81.00 0.00 915.55 915.55 0006 \n",
|
|
"7 591.69 0.00 50.00 0.00 641.69 641.69 \n",
|
|
"8 574.71 0.00 28.00 0.00 602.71 602.71 0008 \n",
|
|
"9 1201.10 0.00 87.00 0.00 1288.10 1288.10 0010 \n",
|
|
"10 500.46 0.00 28.00 0.00 528.46 528.46 0011 \n",
|
|
"\n",
|
|
" type locataire \n",
|
|
"0 Loc. Commercial EFFUSION \n",
|
|
"1 Loc. Commercial RAS \n",
|
|
"2 Appartement T1 KALAI Bernard \n",
|
|
"3 Appartement T2 PEJAUDIER Adelaide \n",
|
|
"4 Appartement T1 MANNA Baptiste \n",
|
|
"5 Loc. Commercial ATELIERS RENAISSANCE \n",
|
|
"6 Appartement T3 GUELLIER MURIEL \n",
|
|
"7 Lot 0007 Appartement T1\\nDOMINIKIEWICZ\\nMELANIE \n",
|
|
"8 Appartement T1 BESSON Léa \n",
|
|
"9 Appartement T3 FILIPPI Bérengère \n",
|
|
"10 Appartement T1 LOINE Anaïs "
|
|
]
|
|
},
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"p2 = pdf.pages[1]\n",
|
|
"extract_situation_loc(p2.extract_table())"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"id": "0e0ddca7",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"charge_table_settings = {\n",
|
|
" \"vertical_strategy\": \"lines\",\n",
|
|
" \"horizontal_strategy\": \"text\",\n",
|
|
"}\n",
|
|
"def extract_charge(table):\n",
|
|
" df = pd.DataFrame(table[1:], columns=table[0]).replace(\"\", np.nan).dropna(subset=[\"Débits\"])\n",
|
|
" drop_index = df[df[\"RECAPITULATIF DES OPERATIONS\"].str.contains(\"TOTAUX\", case=False)].index\n",
|
|
" df.drop(drop_index, inplace=True)\n",
|
|
" return df"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"id": "b915b220",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th>RECAPITULATIF DES OPERATIONS</th>\n",
|
|
" <th>Débits</th>\n",
|
|
" <th>Crédits</th>\n",
|
|
" <th>Dont T.V.A.</th>\n",
|
|
" <th>Locatif</th>\n",
|
|
" <th>Déductible</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>DIDIER NETTOYAGE</td>\n",
|
|
" <td>PC - ENTRETIEN IMMEUBLE</td>\n",
|
|
" <td>708.58</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>118.10</td>\n",
|
|
" <td>708.58</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>6</th>\n",
|
|
" <td>TOTAL DIRECT ENERGIE</td>\n",
|
|
" <td>PC TOTAL DIRECT ENERGIE</td>\n",
|
|
" <td>65.70</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>7.03</td>\n",
|
|
" <td>65.70</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>7</th>\n",
|
|
" <td>EDF</td>\n",
|
|
" <td>PC EDF DU 17.04.2022</td>\n",
|
|
" <td>89.56</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>10.22</td>\n",
|
|
" <td>89.56</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>9</th>\n",
|
|
" <td>PICARD SERVICE</td>\n",
|
|
" <td>Facture du 11/04/2022</td>\n",
|
|
" <td>66.76</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>6.07</td>\n",
|
|
" <td>66.76</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>15</th>\n",
|
|
" <td>V2C MAINTENANCE</td>\n",
|
|
" <td>6 - remplacement circulateur chudière</td>\n",
|
|
" <td>447.70</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>40.70</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>447.70</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>20</th>\n",
|
|
" <td>IMI GERANCE</td>\n",
|
|
" <td>TAVARES NORTE Dylan</td>\n",
|
|
" <td>93.00</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>15.50</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>93.00</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>23</th>\n",
|
|
" <td>IMI GERANCE</td>\n",
|
|
" <td>TAVARES NORTE Dylan</td>\n",
|
|
" <td>173.58</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>28.93</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>173.58</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>27</th>\n",
|
|
" <td>IMI GERANCE</td>\n",
|
|
" <td>TAVARES NORTE Dylan</td>\n",
|
|
" <td>798.72</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>133.12</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>798.72</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>29</th>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>Honoraires H.T.</td>\n",
|
|
" <td>979.20</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>979.20</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>30</th>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>TVA/Honoraires ( 20.00 % )</td>\n",
|
|
" <td>195.84</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>195.84</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>195.84</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" RECAPITULATIF DES OPERATIONS Débits \\\n",
|
|
"4 DIDIER NETTOYAGE PC - ENTRETIEN IMMEUBLE 708.58 \n",
|
|
"6 TOTAL DIRECT ENERGIE PC TOTAL DIRECT ENERGIE 65.70 \n",
|
|
"7 EDF PC EDF DU 17.04.2022 89.56 \n",
|
|
"9 PICARD SERVICE Facture du 11/04/2022 66.76 \n",
|
|
"15 V2C MAINTENANCE 6 - remplacement circulateur chudière 447.70 \n",
|
|
"20 IMI GERANCE TAVARES NORTE Dylan 93.00 \n",
|
|
"23 IMI GERANCE TAVARES NORTE Dylan 173.58 \n",
|
|
"27 IMI GERANCE TAVARES NORTE Dylan 798.72 \n",
|
|
"29 NaN Honoraires H.T. 979.20 \n",
|
|
"30 NaN TVA/Honoraires ( 20.00 % ) 195.84 \n",
|
|
"\n",
|
|
" Crédits Dont T.V.A. Locatif Déductible \n",
|
|
"4 NaN 118.10 708.58 NaN \n",
|
|
"6 NaN 7.03 65.70 NaN \n",
|
|
"7 NaN 10.22 89.56 NaN \n",
|
|
"9 NaN 6.07 66.76 NaN \n",
|
|
"15 NaN 40.70 NaN 447.70 \n",
|
|
"20 NaN 15.50 NaN 93.00 \n",
|
|
"23 NaN 28.93 NaN 173.58 \n",
|
|
"27 NaN 133.12 NaN 798.72 \n",
|
|
"29 NaN NaN NaN 979.20 \n",
|
|
"30 NaN 195.84 NaN 195.84 "
|
|
]
|
|
},
|
|
"execution_count": 10,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"p4 = pdf.pages[3]\n",
|
|
"extract_charge(p4.extract_table(charge_table_settings))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"id": "c7b071fa",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# im = p4.to_image()\n",
|
|
"# im.debug_tablefinder(charge_table_settings)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"id": "ebe2881a",
|
|
"metadata": {
|
|
"scrolled": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"ename": "ModuleNotFoundError",
|
|
"evalue": "No module named 'openpyxl'",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
|
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
|
|
"Cell \u001b[0;32mIn [12], line 15\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mHONORAIRES\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m page\u001b[38;5;241m.\u001b[39mextract_text():\n\u001b[1;32m 13\u001b[0m df_charge \u001b[38;5;241m=\u001b[39m extract_charge(page\u001b[38;5;241m.\u001b[39mextract_table(charge_table_settings))\n\u001b[0;32m---> 15\u001b[0m df_charge\u001b[38;5;241m.\u001b[39mto_excel(xls_charge, sheet_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCharges\u001b[39m\u001b[38;5;124m\"\u001b[39m, index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[1;32m 18\u001b[0m df_loc \u001b[38;5;241m=\u001b[39m extract_situation_loc(loc_table)\n\u001b[1;32m 19\u001b[0m df_loc \u001b[38;5;241m=\u001b[39m df_loc\u001b[38;5;241m.\u001b[39massign(\n\u001b[1;32m 20\u001b[0m mois \u001b[38;5;241m=\u001b[39m mois,\n\u001b[1;32m 21\u001b[0m annee \u001b[38;5;241m=\u001b[39m annee\n\u001b[1;32m 22\u001b[0m )\n",
|
|
"File \u001b[0;32m~/.venv/plesna/lib/python3.10/site-packages/pandas/util/_decorators.py:211\u001b[0m, in \u001b[0;36mdeprecate_kwarg.<locals>._deprecate_kwarg.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 209\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 210\u001b[0m kwargs[new_arg_name] \u001b[38;5;241m=\u001b[39m new_arg_value\n\u001b[0;32m--> 211\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
|
"File \u001b[0;32m~/.venv/plesna/lib/python3.10/site-packages/pandas/util/_decorators.py:211\u001b[0m, in \u001b[0;36mdeprecate_kwarg.<locals>._deprecate_kwarg.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 209\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 210\u001b[0m kwargs[new_arg_name] \u001b[38;5;241m=\u001b[39m new_arg_value\n\u001b[0;32m--> 211\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
|
"File \u001b[0;32m~/.venv/plesna/lib/python3.10/site-packages/pandas/core/generic.py:2373\u001b[0m, in \u001b[0;36mNDFrame.to_excel\u001b[0;34m(self, excel_writer, sheet_name, na_rep, float_format, columns, header, index, index_label, startrow, startcol, engine, merge_cells, encoding, inf_rep, verbose, freeze_panes, storage_options)\u001b[0m\n\u001b[1;32m 2360\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mio\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mformats\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mexcel\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ExcelFormatter\n\u001b[1;32m 2362\u001b[0m formatter \u001b[38;5;241m=\u001b[39m ExcelFormatter(\n\u001b[1;32m 2363\u001b[0m df,\n\u001b[1;32m 2364\u001b[0m na_rep\u001b[38;5;241m=\u001b[39mna_rep,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 2371\u001b[0m inf_rep\u001b[38;5;241m=\u001b[39minf_rep,\n\u001b[1;32m 2372\u001b[0m )\n\u001b[0;32m-> 2373\u001b[0m \u001b[43mformatter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwrite\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2374\u001b[0m \u001b[43m \u001b[49m\u001b[43mexcel_writer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2375\u001b[0m \u001b[43m \u001b[49m\u001b[43msheet_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msheet_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2376\u001b[0m \u001b[43m \u001b[49m\u001b[43mstartrow\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstartrow\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2377\u001b[0m \u001b[43m \u001b[49m\u001b[43mstartcol\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstartcol\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2378\u001b[0m \u001b[43m \u001b[49m\u001b[43mfreeze_panes\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfreeze_panes\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2379\u001b[0m \u001b[43m \u001b[49m\u001b[43mengine\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mengine\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2380\u001b[0m \u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstorage_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2381\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
|
|
"File \u001b[0;32m~/.venv/plesna/lib/python3.10/site-packages/pandas/io/formats/excel.py:911\u001b[0m, in \u001b[0;36mExcelFormatter.write\u001b[0;34m(self, writer, sheet_name, startrow, startcol, freeze_panes, engine, storage_options)\u001b[0m\n\u001b[1;32m 907\u001b[0m need_save \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 908\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 909\u001b[0m \u001b[38;5;66;03m# error: Cannot instantiate abstract class 'ExcelWriter' with abstract\u001b[39;00m\n\u001b[1;32m 910\u001b[0m \u001b[38;5;66;03m# attributes 'engine', 'save', 'supported_extensions' and 'write_cells'\u001b[39;00m\n\u001b[0;32m--> 911\u001b[0m writer \u001b[38;5;241m=\u001b[39m \u001b[43mExcelWriter\u001b[49m\u001b[43m(\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# type: ignore[abstract]\u001b[39;49;00m\n\u001b[1;32m 912\u001b[0m \u001b[43m \u001b[49m\u001b[43mwriter\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mengine\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mengine\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstorage_options\u001b[49m\n\u001b[1;32m 913\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 914\u001b[0m need_save \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 916\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n",
|
|
"File \u001b[0;32m~/.venv/plesna/lib/python3.10/site-packages/pandas/io/excel/_openpyxl.py:56\u001b[0m, in \u001b[0;36mOpenpyxlWriter.__init__\u001b[0;34m(self, path, engine, date_format, datetime_format, mode, storage_options, if_sheet_exists, engine_kwargs, **kwargs)\u001b[0m\n\u001b[1;32m 43\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\n\u001b[1;32m 44\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 45\u001b[0m path: FilePath \u001b[38;5;241m|\u001b[39m WriteExcelBuffer \u001b[38;5;241m|\u001b[39m ExcelWriter,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 54\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 55\u001b[0m \u001b[38;5;66;03m# Use the openpyxl module as the Excel writer.\u001b[39;00m\n\u001b[0;32m---> 56\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopenpyxl\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mworkbook\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Workbook\n\u001b[1;32m 58\u001b[0m engine_kwargs \u001b[38;5;241m=\u001b[39m combine_kwargs(engine_kwargs, kwargs)\n\u001b[1;32m 60\u001b[0m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(\n\u001b[1;32m 61\u001b[0m path,\n\u001b[1;32m 62\u001b[0m mode\u001b[38;5;241m=\u001b[39mmode,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 65\u001b[0m engine_kwargs\u001b[38;5;241m=\u001b[39mengine_kwargs,\n\u001b[1;32m 66\u001b[0m )\n",
|
|
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'openpyxl'"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"frames = []\n",
|
|
"loc_table = []\n",
|
|
"for page in pdf.pages:\n",
|
|
" situation_loc_line = [l for l in page.extract_text().split(\"\\n\") if \"SITUATION DES LOCATAIRES CRG\" in l]\n",
|
|
" if situation_loc_line:\n",
|
|
" mois, annee = situation_loc_line[0].split(\" \")[-2:]\n",
|
|
" if loc_table:\n",
|
|
" loc_table += page.extract_table()[1:]\n",
|
|
" else:\n",
|
|
" loc_table = page.extract_table()\n",
|
|
"\n",
|
|
" if \"HONORAIRES\" in page.extract_text():\n",
|
|
" df_charge = extract_charge(page.extract_table(charge_table_settings))\n",
|
|
"\n",
|
|
" df_charge.to_excel(xls_charge, sheet_name=\"Charges\", index=False)\n",
|
|
"\n",
|
|
"\n",
|
|
"df_loc = extract_situation_loc(loc_table)\n",
|
|
"df_loc = df_loc.assign(\n",
|
|
" mois = mois,\n",
|
|
" annee = annee\n",
|
|
")\n",
|
|
"df_loc.to_excel(xls_locataire, sheet_name=\"Location\", index=False)\n",
|
|
"#df_loc"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "f2e22a94",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "dad54ca3",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.10.7"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|