{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "84d2d916-7061-477f-8b97-6dcb924a8306", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from pathlib import Path" ] }, { "cell_type": "code", "execution_count": 2, "id": "a41b710d-f016-4f3c-923b-cc1d5949cf1e", "metadata": {}, "outputs": [], "source": [ "staging_path = Path(\"../PLESNA Compta SYSTEM/staging/\")\n", "staging_columns = [\"Régie\",\"Immeuble\",\"Porte\",\"Lot\",\"Année\",\"Mois\",\"Catégorie\",\"Fournisseur\",\"Libellé\",\"Débit\",\"Crédit\",\"Impact\"]" ] }, { "cell_type": "code", "execution_count": 3, "id": "6992dcd7-b90d-4e57-9211-8a867071f83c", "metadata": {}, "outputs": [], "source": [ "staging_files = set()" ] }, { "cell_type": "code", "execution_count": 4, "id": "04139774-9a6d-472f-80e6-27415d3499b4", "metadata": {}, "outputs": [], "source": [ "def to_csv(df, dest):\n", " if dest.exists():\n", " df.to_csv(dest, mode=\"a\", header=False, index=False)\n", " else:\n", " df.to_csv(dest, index=False)" ] }, { "cell_type": "code", "execution_count": 5, "id": "41806d70-cb2d-4f0b-82e8-f647d37471bc", "metadata": {}, "outputs": [], "source": [ "for f in staging_path.glob(\"**/*.csv\"):\n", " f.unlink()" ] }, { "cell_type": "markdown", "id": "c8703ffa-fc39-4618-8998-87717c6c1e2f", "metadata": {}, "source": [ "# Import history CRG\n", "\n", "Le but de cette partie est d'importer les anciens CRG et de les adapter au format actuel." ] }, { "cell_type": "code", "execution_count": 6, "id": "b4decfa2-3394-40e8-b012-2a9dc354d697", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "raw_path = Path(\"../PLESNA Compta SYSTEM/raw/CRG/\")\n", "assert raw_path.exists()\n", "list(raw_path.glob(\"*/**\"))" ] }, { "cell_type": "markdown", "id": "8ec11543-d858-4c85-864a-ccbab830ef67", "metadata": {}, "source": [ "## Import de `2019 et avant.xlsx`\n" ] }, { "cell_type": "code", "execution_count": 7, "id": "c9167f6a-a622-41d7-b2af-a70c7ac08f70", "metadata": {}, "outputs": [], "source": [ "file = raw_path/\"2019 et avant.xlsx\"\n", "assert file.exists()\n", "df = pd.read_excel(file, sheet_name=\"IMI Gérence\", )\n", " #parse_dates = [\"Date\"], date_format=\"%Y-%m%d\")" ] }, { "cell_type": "code", "execution_count": 8, "id": "981884df-0b8d-43a1-b140-1fdf2e2c0b25", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['Caution', 'Solde Comptable', 'Xfert entre compa immeubles']\n" ] }, { "data": { "text/plain": [ "{'Ascenseur': 'Ascenseur',\n", " 'Charge Remboursement': 'Loyer Charge',\n", " 'contrat assurance': 'Assurance',\n", " 'contrat entretien': 'Entretien',\n", " 'diagnostics': 'Diagnotics',\n", " 'divers (plaques…)': 'Travaux',\n", " 'Elec': 'Elec',\n", " 'honor location': 'Hono Gestion',\n", " 'honor EDL': 'Hono E/S',\n", " 'honor gestion': 'Hono Gestion',\n", " 'honor location': 'Hono E/S',\n", " 'honor remise': 'Hono Gestion',\n", " 'Loyer + Charges': 'Loyer Charge',\n", " 'Tel': 'Tel',\n", " 'travaux': 'Travaux'}" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cat = pd.read_excel(file, sheet_name=\"Catégorie Mapping\")\n", "cat_drop = list(cat[cat[\"Nouvelles\"]==\"NE PAS IMPORTER\"][\"Anciennes\"])\n", "print(cat_drop)\n", "cat_trans = cat[cat[\"Nouvelles\"]!=\"NE PAS IMPORTER\"]\n", "trans = {}\n", "for _, (old, new) in cat_trans.iterrows():\n", " trans[old] = new\n", "trans" ] }, { "cell_type": "code", "execution_count": 9, "id": "369b39ff-ad65-44b9-b7d7-ece13e173059", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RégieImmeublePorteDateCatégorieLibelléDébitCrédit
0Imi GéranceS52017-05-31Loyer + ChargesRègl. Loyer 06/2017NaN720.00
1Imi GéranceS52017-05-31Loyer + ChargesRègl. Prov. Char 06/2017NaN191.00
2Imi GéranceS12017-06-01Loyer + ChargesRègl. Prov. Char 04 à 06/2017NaN633.28
3Imi GéranceS42017-06-01Loyer + ChargesRègl. Loyer 06/2017NaN576.00
4Imi GéranceS42017-06-01Loyer + ChargesRègl. Prov. Char 06/2017NaN31.00
\n", "
" ], "text/plain": [ " Régie Immeuble Porte Date Catégorie \\\n", "0 Imi Gérance S 5 2017-05-31 Loyer + Charges \n", "1 Imi Gérance S 5 2017-05-31 Loyer + Charges \n", "2 Imi Gérance S 1 2017-06-01 Loyer + Charges \n", "3 Imi Gérance S 4 2017-06-01 Loyer + Charges \n", "4 Imi Gérance S 4 2017-06-01 Loyer + Charges \n", "\n", " Libellé Débit Crédit \n", "0 Règl. Loyer 06/2017 NaN 720.00 \n", "1 Règl. Prov. Char 06/2017 NaN 191.00 \n", "2 Règl. Prov. Char 04 à 06/2017 NaN 633.28 \n", "3 Règl. Loyer 06/2017 NaN 576.00 \n", "4 Règl. Prov. Char 06/2017 NaN 31.00 " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 10, "id": "12358a6b-e563-44be-b6b2-35c5e5d1d35b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Régie object\n", "Immeuble object\n", "Porte object\n", "Date datetime64[ns]\n", "Catégorie object\n", "Libellé object\n", "Débit float64\n", "Crédit float64\n", "dtype: object" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.dtypes" ] }, { "cell_type": "markdown", "id": "76a5d178-6f0c-4497-847f-2f318800dbfb", "metadata": {}, "source": [ "Filter lines" ] }, { "cell_type": "code", "execution_count": 11, "id": "8361bb7b-c422-4da6-8b3b-e43f50d4a1ec", "metadata": {}, "outputs": [], "source": [ "df = df[~df[\"Catégorie\"].isin(cat_drop)]" ] }, { "cell_type": "markdown", "id": "6f95a6ee-6d0b-4417-b2c2-60faa4847e4f", "metadata": {}, "source": [ "Featuring" ] }, { "cell_type": "code", "execution_count": 12, "id": "7caa1b95-c321-4f8e-9f9b-c7e867e23921", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RégieImmeublePorteDateCatégorieLibelléDébitCréditLotAnnéeMoisFournisseurImpact
0Imi GéranceS52017-05-31Loyer ChargeRègl. Loyer 06/20170.0720.00S05201705720.00
1Imi GéranceS52017-05-31Loyer ChargeRègl. Prov. Char 06/20170.0191.00S05201705191.00
2Imi GéranceS12017-06-01Loyer ChargeRègl. Prov. Char 04 à 06/20170.0633.28S01201706633.28
3Imi GéranceS42017-06-01Loyer ChargeRègl. Loyer 06/20170.0576.00S04201706576.00
4Imi GéranceS42017-06-01Loyer ChargeRègl. Prov. Char 06/20170.031.00S0420170631.00
\n", "
" ], "text/plain": [ " Régie Immeuble Porte Date Catégorie \\\n", "0 Imi Gérance S 5 2017-05-31 Loyer Charge \n", "1 Imi Gérance S 5 2017-05-31 Loyer Charge \n", "2 Imi Gérance S 1 2017-06-01 Loyer Charge \n", "3 Imi Gérance S 4 2017-06-01 Loyer Charge \n", "4 Imi Gérance S 4 2017-06-01 Loyer Charge \n", "\n", " Libellé Débit Crédit Lot Année Mois Fournisseur \\\n", "0 Règl. Loyer 06/2017 0.0 720.00 S05 2017 05 \n", "1 Règl. Prov. Char 06/2017 0.0 191.00 S05 2017 05 \n", "2 Règl. Prov. Char 04 à 06/2017 0.0 633.28 S01 2017 06 \n", "3 Règl. Loyer 06/2017 0.0 576.00 S04 2017 06 \n", "4 Règl. Prov. Char 06/2017 0.0 31.00 S04 2017 06 \n", "\n", " Impact \n", "0 720.00 \n", "1 191.00 \n", "2 633.28 \n", "3 576.00 \n", "4 31.00 " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = df.assign(\n", " Débit = df[\"Débit\"].fillna(0),\n", " Crédit = df[\"Crédit\"].fillna(0),\n", " Lot = df[\"Immeuble\"].astype(str)+df[\"Porte\"].astype(\"str\").str.zfill(2),\n", " Année = df[\"Date\"].astype(str).str.slice(0,4),\n", " Mois = df[\"Date\"].astype(str).str.slice(5,7),\n", " Catégorie = df[\"Catégorie\"].replace(trans),\n", " Fournisseur = \"\",\n", ")\n", "df = df.assign(\n", " Impact = df[\"Crédit\"] - df[\"Débit\"],\n", ")\n", "df.head()" ] }, { "cell_type": "markdown", "id": "b1951ea6-6e49-41b3-9c2f-92328e9d76ed", "metadata": {}, "source": [ "Verify columns and select thoses" ] }, { "cell_type": "code", "execution_count": 13, "id": "a9c243b5-6d66-4113-8ddd-2c35f6a8d8d2", "metadata": {}, "outputs": [], "source": [ "df = df[staging_columns]" ] }, { "cell_type": "code", "execution_count": 14, "id": "c90216b7-fa62-46d9-b93e-0b7626d40832", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "../PLESNA Compta SYSTEM/staging/CRG/2017.csv\n", "../PLESNA Compta SYSTEM/staging/CRG/2018.csv\n", "../PLESNA Compta SYSTEM/staging/CRG/2019.csv\n", "../PLESNA Compta SYSTEM/staging/CRG/2020.csv\n" ] } ], "source": [ "\n", "for year in df[\"Année\"].unique():\n", " df_year = df[df[\"Année\"]==year]\n", " dest = staging_path/ f\"CRG/{year}.csv\"\n", " print(dest)\n", " to_csv(df_year, dest)\n", " staging_files.add(dest)" ] }, { "cell_type": "code", "execution_count": null, "id": "786d00dd-3fc6-4bd6-9a05-bf9db16eb5be", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "c22a8c5f-4fad-4b7e-bcfa-b058cbd42f8f", "metadata": {}, "source": [ "## Import `2020 2022.xslx`" ] }, { "cell_type": "code", "execution_count": 15, "id": "ac33c438-f155-450c-b805-48282e5b1f67", "metadata": {}, "outputs": [], "source": [ "file = raw_path/\"2020 2022.xlsx\"\n", "assert file.exists()\n", "df = pd.read_excel(file, sheet_name=\"DB CRG\", )" ] }, { "cell_type": "code", "execution_count": 16, "id": "df0f36e3-ee81-4195-bbb3-f9307294440f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['caution', 'Solde Comptable', 'Xfert entre compa immeubles', 'Xfert Tréso']\n" ] }, { "data": { "text/plain": [ "{'contentieux': 'Contentieux',\n", " 'contrat ascenseur': 'Ascenseur',\n", " 'contrat assurance': 'Assurance',\n", " 'contrat assurance juridique': 'Assurance',\n", " 'contrat entretien': 'Entretien',\n", " 'diagnostics': 'Diagnotics',\n", " 'divers': 'Travaux',\n", " 'divers (plaques…)': 'Travaux',\n", " 'eau': 'Eau',\n", " 'eau ': 'Eau',\n", " 'électricité': 'Elec',\n", " 'honor location': 'Hono Gestion',\n", " 'honor divers': 'Hono Gestion',\n", " 'honor EDL': 'Hono E/S',\n", " 'honor edl ': 'Hono E/S',\n", " 'honor gestion': 'Hono Gestion',\n", " 'honor location': 'Hono E/S',\n", " 'honor remise': 'Hono Gestion',\n", " 'Honoraire Gestion': 'Hono Gestion',\n", " 'loyer+charge': 'Loyer Charge',\n", " 'travaux': 'Travaux'}" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cat = pd.read_excel(file, sheet_name=\"Catégories\")\n", "cat_drop = list(cat[cat[\"Nouvelles\"]==\"NE PAS IMPORTER\"][\"Anciennes\"])\n", "print(cat_drop)\n", "cat_trans = cat[cat[\"Nouvelles\"]!=\"NE PAS IMPORTER\"]\n", "trans = {}\n", "for _, (old, new) in cat_trans.iterrows():\n", " trans[old] = new\n", "trans" ] }, { "cell_type": "code", "execution_count": 17, "id": "a177e08d-8ba3-4bbb-ac50-ef52cd047137", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RégieimmeubleporteDateCategorieLibelléDébitCréditRéserveAnnéeMoisTrimestreLotImpact
01.0IMIGéranceS52017-05-31loyer+chargeRègl. Loyer 06/2017NaN720.00NaN201752S05-720.00
12.0IMIGéranceS52017-05-31loyer+chargeRègl. Prov. Char 06/2017NaN191.00NaN201752S05-191.00
23.0IMIGéranceS12017-06-01loyer+chargeRègl. Prov. Char 04 à 06/2017NaN633.28NaN201762S01-633.28
34.0IMIGéranceS42017-06-01loyer+chargeRègl. Loyer 06/2017NaN576.00NaN201762S04-576.00
45.0IMIGéranceS42017-06-01loyer+chargeRègl. Prov. Char 06/2017NaN31.00NaN201762S04-31.00
\n", "
" ], "text/plain": [ " N° Régie immeuble porte Date Categorie \\\n", "0 1.0 IMIGérance S 5 2017-05-31 loyer+charge \n", "1 2.0 IMIGérance S 5 2017-05-31 loyer+charge \n", "2 3.0 IMIGérance S 1 2017-06-01 loyer+charge \n", "3 4.0 IMIGérance S 4 2017-06-01 loyer+charge \n", "4 5.0 IMIGérance S 4 2017-06-01 loyer+charge \n", "\n", " Libellé Débit Crédit Réserve Année Mois \\\n", "0 Règl. Loyer 06/2017 NaN 720.00 NaN 2017 5 \n", "1 Règl. Prov. Char 06/2017 NaN 191.00 NaN 2017 5 \n", "2 Règl. Prov. Char 04 à 06/2017 NaN 633.28 NaN 2017 6 \n", "3 Règl. Loyer 06/2017 NaN 576.00 NaN 2017 6 \n", "4 Règl. Prov. Char 06/2017 NaN 31.00 NaN 2017 6 \n", "\n", " Trimestre Lot Impact \n", "0 2 S05 -720.00 \n", "1 2 S05 -191.00 \n", "2 2 S01 -633.28 \n", "3 2 S04 -576.00 \n", "4 2 S04 -31.00 " ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 18, "id": "eac9a790-08f0-4e75-b5dd-087496949c71", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RégieimmeubleporteDateCategorieLibelléDébitCréditRéserveAnnéeMoisTrimestreLotImpactImmeublePorteCatégorieFournisseur
01.0IMIGéranceS52017-05-31loyer+chargeRègl. Loyer 06/20170.0720.00NaN2017052S05NaNS5Loyer Charge
12.0IMIGéranceS52017-05-31loyer+chargeRègl. Prov. Char 06/20170.0191.00NaN2017052S05NaNS5Loyer Charge
23.0IMIGéranceS12017-06-01loyer+chargeRègl. Prov. Char 04 à 06/20170.0633.28NaN2017062S01NaNS1Loyer Charge
34.0IMIGéranceS42017-06-01loyer+chargeRègl. Loyer 06/20170.0576.00NaN2017062S04NaNS4Loyer Charge
45.0IMIGéranceS42017-06-01loyer+chargeRègl. Prov. Char 06/20170.031.00NaN2017062S04NaNS4Loyer Charge
\n", "
" ], "text/plain": [ " N° Régie immeuble porte Date Categorie \\\n", "0 1.0 IMIGérance S 5 2017-05-31 loyer+charge \n", "1 2.0 IMIGérance S 5 2017-05-31 loyer+charge \n", "2 3.0 IMIGérance S 1 2017-06-01 loyer+charge \n", "3 4.0 IMIGérance S 4 2017-06-01 loyer+charge \n", "4 5.0 IMIGérance S 4 2017-06-01 loyer+charge \n", "\n", " Libellé Débit Crédit Réserve Année Mois \\\n", "0 Règl. Loyer 06/2017 0.0 720.00 NaN 2017 05 \n", "1 Règl. Prov. Char 06/2017 0.0 191.00 NaN 2017 05 \n", "2 Règl. Prov. Char 04 à 06/2017 0.0 633.28 NaN 2017 06 \n", "3 Règl. Loyer 06/2017 0.0 576.00 NaN 2017 06 \n", "4 Règl. Prov. Char 06/2017 0.0 31.00 NaN 2017 06 \n", "\n", " Trimestre Lot Impact Immeuble Porte Catégorie Fournisseur \n", "0 2 S05 NaN S 5 Loyer Charge \n", "1 2 S05 NaN S 5 Loyer Charge \n", "2 2 S01 NaN S 1 Loyer Charge \n", "3 2 S04 NaN S 4 Loyer Charge \n", "4 2 S04 NaN S 4 Loyer Charge " ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = df.assign(\n", " Débit = df[\"Débit\"].fillna(0),\n", " Immeuble = df[\"immeuble\"],\n", " Porte = df[\"porte\"],\n", " Crédit = df[\"Crédit\"].fillna(0),\n", " Lot = df[\"immeuble\"].astype(str)+df[\"porte\"].astype(\"str\").str.zfill(2),\n", " Année = df[\"Date\"].astype(str).str.slice(0,4),\n", " Mois = df[\"Date\"].astype(str).str.slice(5,7),\n", " Impact = df[\"Crédit\"] - df[\"Débit\"],\n", " Catégorie = df[\"Categorie\"].replace(trans),\n", " Fournisseur = \"\",\n", ")\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 19, "id": "0b0bd55e-ff4d-49be-9e77-8b47d8c3a5cd", "metadata": {}, "outputs": [], "source": [ "df = df[staging_columns]" ] }, { "cell_type": "code", "execution_count": 20, "id": "49d216c9-ad59-4db3-ba47-eec840ae53d4", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "../PLESNA Compta SYSTEM/staging/CRG/2017.csv\n", "../PLESNA Compta SYSTEM/staging/CRG/2018.csv\n", "../PLESNA Compta SYSTEM/staging/CRG/2019.csv\n", "../PLESNA Compta SYSTEM/staging/CRG/2020.csv\n", "../PLESNA Compta SYSTEM/staging/CRG/2021.csv\n", "../PLESNA Compta SYSTEM/staging/CRG/2022.csv\n" ] } ], "source": [ "\n", "for year in df[\"Année\"].unique():\n", " df_year = df[df[\"Année\"]==year]\n", " dest = staging_path/ f\"CRG/{year}.csv\"\n", " print(dest)\n", " to_csv(df_year, dest)\n", " staging_files.add(dest)" ] }, { "cell_type": "markdown", "id": "84d4bea9-59e6-4ca3-865e-5add5a992913", "metadata": {}, "source": [ "## Import de `2023`" ] }, { "cell_type": "code", "execution_count": 21, "id": "5815f53e-e665-4caa-a510-f2497c7ca16c", "metadata": {}, "outputs": [], "source": [ "file = raw_path/\"2023.xlsx\"\n", "assert file.exists()\n", "df = pd.read_excel(file, sheet_name=\"DB CRG 2023 ...\", )\n", "year = 2023" ] }, { "cell_type": "code", "execution_count": 22, "id": "1ff99c68-8bcb-4957-a359-a7ebb4dce337", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RégieImmeublePorteMoisCatégorieFournisseurLibelléDébitCréditLotAnnéeImpact
0GelasB*1EntretienPOEZEVARA NETTOYAGEFACTURE ENTRETIEN IMMEUBLE PC N° FC61954491.170.0B0*2023-491.17
1GelasB*1Hono GestionRosierHonoraires H.T.699.100.0B0*2023-699.10
2GelasB*1Hono GestionRosierTVA/Honoraires ( 20.00 % )139.820.0B0*2023-139.82
3GelasM*1EntretienREMALI BRIL'ORPC ENTRETIEN - DECEMBRE 22363.770.0M0*2023-363.77
4GelasM*1EntretienASTECMARIETTON ENTRETIEN 1T23453.790.0M0*2023-453.79
\n", "
" ], "text/plain": [ " Régie Immeuble Porte Mois Catégorie Fournisseur \\\n", "0 Gelas B * 1 Entretien POEZEVARA NETTOYAGE \n", "1 Gelas B * 1 Hono Gestion Rosier \n", "2 Gelas B * 1 Hono Gestion Rosier \n", "3 Gelas M * 1 Entretien REMALI BRIL'OR \n", "4 Gelas M * 1 Entretien ASTEC \n", "\n", " Libellé Débit Crédit Lot Année \\\n", "0 FACTURE ENTRETIEN IMMEUBLE PC N° FC61954 491.17 0.0 B0* 2023 \n", "1 Honoraires H.T. 699.10 0.0 B0* 2023 \n", "2 TVA/Honoraires ( 20.00 % ) 139.82 0.0 B0* 2023 \n", "3 PC ENTRETIEN - DECEMBRE 22 363.77 0.0 M0* 2023 \n", "4 MARIETTON ENTRETIEN 1T23 453.79 0.0 M0* 2023 \n", "\n", " Impact \n", "0 -491.17 \n", "1 -699.10 \n", "2 -139.82 \n", "3 -363.77 \n", "4 -453.79 " ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = df.assign(\n", " Débit = df[\"Débit\"].fillna(0),\n", " Crédit = df[\"Crédit\"].fillna(0),\n", " Lot = df[\"Immeuble\"].astype(str)+df[\"Porte\"].astype(\"str\").str.zfill(2),\n", " Année = year,\n", ")\n", "df = df.assign(\n", " Impact = df[\"Crédit\"] - df[\"Débit\"],\n", ")\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 23, "id": "47284150-a63d-4427-9c7c-8ed5136df1f1", "metadata": {}, "outputs": [], "source": [ "df = df[staging_columns]" ] }, { "cell_type": "code", "execution_count": 24, "id": "4627682f-a5a9-4ade-8dee-113a2399f85e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "../PLESNA Compta SYSTEM/staging/CRG/2023.csv\n" ] } ], "source": [ "\n", "dest = staging_path/ f\"CRG/{year}.csv\"\n", "print(dest)\n", "to_csv(df_year, dest)\n", "staging_files.add(dest)" ] }, { "cell_type": "markdown", "id": "1e4bbd3a-f77d-4b16-bdf5-f2532c4227a5", "metadata": {}, "source": [ "# Import de l'historique de banque" ] }, { "cell_type": "code", "execution_count": 25, "id": "7da38abe-5382-4366-9824-09e0dd2a02d6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[PosixPath('../PLESNA Compta SYSTEM/raw/Banque/Histoire depuis 2020.xlsx')]" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "banque_path = Path(\"../PLESNA Compta SYSTEM/raw/Banque/\")\n", "assert raw_path.exists()\n", "list(banque_path.glob(\"*\"))" ] }, { "cell_type": "code", "execution_count": 26, "id": "3f6d380a-35f8-4afb-b455-76f07a06993f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['Banque', 'Immeuble', 'Porte', 'Lot', 'date', 'Année', 'Mois',\n", " 'Catégorie', 'Libellé', 'Débit', 'Crédit', 'Impact'],\n", " dtype='object')" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "schema_banque = staging_path / \"Banque/Schema.xlsx\"\n", "assert schema_banque.exists()\n", "banque_columns = pd.read_excel(schema_banque, sheet_name=\"Schema\").columns\n", "banque_columns" ] }, { "cell_type": "markdown", "id": "0bb2e54f-6968-4f07-8379-22dd973e71d8", "metadata": {}, "source": [ "## Import de `Histoire ...`" ] }, { "cell_type": "code", "execution_count": 27, "id": "a751b591-66c0-4c8c-9d5c-c8a51adb1e69", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ImmeublePortedatecatégorielibelléDEBITCREDITBANQUEAnnéeMoisLotImpactCMSLBNPSolde
02020NaNNaNNaTNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
11NaNNaN2020-01-15Frais BancaireNaN14.1NaNCM0.00.00.00.00.00.00.00.0
22SNaN2020-01-15TFImpots1570.0NaNCM0.00.00.00.00.00.00.00.0
33MNaN2020-01-15TFImpots1209.0NaNCM0.00.00.00.00.00.00.00.0
44BNaN2020-01-15TFImpots905.0NaNCM0.00.00.00.00.00.00.00.0
......................................................
3750MNaN2022-12-24Revenue GéranceVIR SEPANaN10118.86BNP0.00.00.00.00.00.00.00.0
3760BNaN2022-12-24Revenue GéranceVIR SEPANaN10343.82BNP0.00.00.00.00.00.00.00.0
3770NaNNaN2022-12-28Frais FinancierECHEANCE PRET 01383 609349271250.0NaNBNP0.00.00.00.00.00.00.00.0
3780NaNNaN2022-12-30GerantTNS Gerant A. Bertrand Aout1000.0NaNBNP0.00.00.00.00.00.00.00.0
3790NaNNaN2022-12-30ComptableNaN660.0NaNBNP0.00.00.00.00.00.00.00.0
\n", "

380 rows × 17 columns

\n", "
" ], "text/plain": [ " N° Immeuble Porte date catégorie \\\n", "0 2020 NaN NaN NaT NaN \n", "1 1 NaN NaN 2020-01-15 Frais Bancaire \n", "2 2 S NaN 2020-01-15 TF \n", "3 3 M NaN 2020-01-15 TF \n", "4 4 B NaN 2020-01-15 TF \n", ".. ... ... ... ... ... \n", "375 0 M NaN 2022-12-24 Revenue Gérance \n", "376 0 B NaN 2022-12-24 Revenue Gérance \n", "377 0 NaN NaN 2022-12-28 Frais Financier \n", "378 0 NaN NaN 2022-12-30 Gerant \n", "379 0 NaN NaN 2022-12-30 Comptable \n", "\n", " libellé DEBIT CREDIT BANQUE Année Mois Lot \\\n", "0 NaN NaN NaN NaN NaN NaN NaN \n", "1 NaN 14.1 NaN CM 0.0 0.0 0.0 \n", "2 Impots 1570.0 NaN CM 0.0 0.0 0.0 \n", "3 Impots 1209.0 NaN CM 0.0 0.0 0.0 \n", "4 Impots 905.0 NaN CM 0.0 0.0 0.0 \n", ".. ... ... ... ... ... ... ... \n", "375 VIR SEPA NaN 10118.86 BNP 0.0 0.0 0.0 \n", "376 VIR SEPA NaN 10343.82 BNP 0.0 0.0 0.0 \n", "377 ECHEANCE PRET 01383 60934927 1250.0 NaN BNP 0.0 0.0 0.0 \n", "378 TNS Gerant A. Bertrand Aout 1000.0 NaN BNP 0.0 0.0 0.0 \n", "379 NaN 660.0 NaN BNP 0.0 0.0 0.0 \n", "\n", " Impact CM SL BNP Solde \n", "0 NaN NaN NaN NaN NaN \n", "1 0.0 0.0 0.0 0.0 0.0 \n", "2 0.0 0.0 0.0 0.0 0.0 \n", "3 0.0 0.0 0.0 0.0 0.0 \n", "4 0.0 0.0 0.0 0.0 0.0 \n", ".. ... ... ... ... ... \n", "375 0.0 0.0 0.0 0.0 0.0 \n", "376 0.0 0.0 0.0 0.0 0.0 \n", "377 0.0 0.0 0.0 0.0 0.0 \n", "378 0.0 0.0 0.0 0.0 0.0 \n", "379 0.0 0.0 0.0 0.0 0.0 \n", "\n", "[380 rows x 17 columns]" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "file = banque_path/\"Histoire depuis 2020.xlsx\"\n", "assert file.exists()\n", "df = pd.read_excel(file, skiprows=2)\n", "df" ] }, { "cell_type": "code", "execution_count": 28, "id": "9cf1be24-0807-431f-9b1e-98b95a2a1689", "metadata": {}, "outputs": [], "source": [ "# Bad line clean\n", "df = df.dropna(subset=[\"Immeuble\", \"Porte\", \"date\"], how=\"all\")" ] }, { "cell_type": "code", "execution_count": 29, "id": "c6cb1ad7-bb9e-4196-beb4-ac096070a090", "metadata": {}, "outputs": [], "source": [ "df = df.assign(\n", " Banque = df[\"BANQUE\"],\n", " Catégorie = df[\"catégorie\"],\n", " Libellé = df[\"libellé\"],\n", " Débit = df[\"DEBIT\"].fillna(0),\n", " Crédit = df[\"CREDIT\"].fillna(0),\n", " Année = df[\"date\"].astype(str).str.slice(0,4),\n", " Mois = df[\"date\"].astype(str).str.slice(5,7),\n", ")\n", "df = df.assign(\n", " Impact = df[\"Crédit\"] - df[\"Débit\"],\n", ")\n" ] }, { "cell_type": "code", "execution_count": 30, "id": "d73cdaf2-72c3-4c90-ba2d-32d7377b5aee", "metadata": {}, "outputs": [], "source": [ "df = df[banque_columns]" ] }, { "cell_type": "code", "execution_count": 31, "id": "e58c891e-c91a-4832-bb03-43d2e11cc985", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
BanqueImmeublePorteLotdateAnnéeMoisCatégorieLibelléDébitCréditImpact
1CMNaNNaN0.02020-01-15202001Frais BancaireNaN14.10.0-14.1
2CMSNaN0.02020-01-15202001TFImpots1570.00.0-1570.0
3CMMNaN0.02020-01-15202001TFImpots1209.00.0-1209.0
4CMBNaN0.02020-01-15202001TFImpots905.00.0-905.0
5CMNaNNaN0.02020-01-15202001Frais FinancierPrêts CM 1,8M€5715.00.0-5715.0
\n", "
" ], "text/plain": [ " Banque Immeuble Porte Lot date Année Mois Catégorie \\\n", "1 CM NaN NaN 0.0 2020-01-15 2020 01 Frais Bancaire \n", "2 CM S NaN 0.0 2020-01-15 2020 01 TF \n", "3 CM M NaN 0.0 2020-01-15 2020 01 TF \n", "4 CM B NaN 0.0 2020-01-15 2020 01 TF \n", "5 CM NaN NaN 0.0 2020-01-15 2020 01 Frais Financier \n", "\n", " Libellé Débit Crédit Impact \n", "1 NaN 14.1 0.0 -14.1 \n", "2 Impots 1570.0 0.0 -1570.0 \n", "3 Impots 1209.0 0.0 -1209.0 \n", "4 Impots 905.0 0.0 -905.0 \n", "5 Prêts CM 1,8M€ 5715.0 0.0 -5715.0 " ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 32, "id": "fe760cd7-2cb3-4c40-91c6-92d87bbe48d1", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "../PLESNA Compta SYSTEM/staging/Banque/2020.csv\n", "../PLESNA Compta SYSTEM/staging/Banque/2021.csv\n", "../PLESNA Compta SYSTEM/staging/Banque/2022.csv\n" ] } ], "source": [ "for year in df[\"Année\"].unique():\n", " df_year = df[df[\"Année\"]==year]\n", " dest = staging_path/ f\"Banque/{year}.csv\"\n", " print(dest)\n", " to_csv(df_year, dest)\n", " staging_files.add(dest)" ] }, { "cell_type": "code", "execution_count": null, "id": "c84472c0-e71d-4fc1-bf92-9093d895dd40", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "bdb5dd69-7722-4e7f-b734-65b608117854", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "41b90e89-9d3b-462c-ab00-6da28b8e16c8", "metadata": {}, "source": [ "## Clean duplicates" ] }, { "cell_type": "code", "execution_count": 33, "id": "8bfa036d-0526-4756-b556-e19401aeac71", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{PosixPath('../PLESNA Compta SYSTEM/staging/Banque/2020.csv'),\n", " PosixPath('../PLESNA Compta SYSTEM/staging/Banque/2021.csv'),\n", " PosixPath('../PLESNA Compta SYSTEM/staging/Banque/2022.csv'),\n", " PosixPath('../PLESNA Compta SYSTEM/staging/CRG/2017.csv'),\n", " PosixPath('../PLESNA Compta SYSTEM/staging/CRG/2018.csv'),\n", " PosixPath('../PLESNA Compta SYSTEM/staging/CRG/2019.csv'),\n", " PosixPath('../PLESNA Compta SYSTEM/staging/CRG/2020.csv'),\n", " PosixPath('../PLESNA Compta SYSTEM/staging/CRG/2021.csv'),\n", " PosixPath('../PLESNA Compta SYSTEM/staging/CRG/2022.csv'),\n", " PosixPath('../PLESNA Compta SYSTEM/staging/CRG/2023.csv')}" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "staging_files" ] }, { "cell_type": "code", "execution_count": 34, "id": "9f0d1d85-d241-43d3-93d8-b3e52fdcaf51", "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "../PLESNA Compta SYSTEM/staging/Banque/2020.csv got 0 duplicated rows\n", "../PLESNA Compta SYSTEM/staging/CRG/2018.csv got 72 duplicated rows\n", "../PLESNA Compta SYSTEM/staging/Banque/2021.csv got 0 duplicated rows\n", "../PLESNA Compta SYSTEM/staging/CRG/2017.csv got 12 duplicated rows\n", "../PLESNA Compta SYSTEM/staging/CRG/2020.csv got 29 duplicated rows\n", "../PLESNA Compta SYSTEM/staging/Banque/2022.csv got 1 duplicated rows\n", "../PLESNA Compta SYSTEM/staging/CRG/2019.csv got 24 duplicated rows\n", "../PLESNA Compta SYSTEM/staging/CRG/2021.csv got 2 duplicated rows\n", "../PLESNA Compta SYSTEM/staging/CRG/2023.csv got 0 duplicated rows\n", "../PLESNA Compta SYSTEM/staging/CRG/2022.csv got 0 duplicated rows\n" ] } ], "source": [ "for file in staging_files:\n", " df = pd.read_csv(file)\n", " print(f\"{file} got {len(df[df.duplicated()])} duplicated rows\")\n", " df = df[~df.duplicated()]\n", " to_csv(df, file)" ] }, { "cell_type": "code", "execution_count": null, "id": "31fd53e4-6915-4087-b0f9-631f3726f5d4", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.6" } }, "nbformat": 4, "nbformat_minor": 5 }