plesna/notebooks/histo2staging.ipynb

1994 lines
60 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "84d2d916-7061-477f-8b97-6dcb924a8306",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from pathlib import Path"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "a41b710d-f016-4f3c-923b-cc1d5949cf1e",
"metadata": {},
"outputs": [],
"source": [
"staging_path = Path(\"../PLESNA Compta SYSTEM/staging/\")\n",
"staging_columns = [\"Régie\",\"Immeuble\",\"Porte\",\"Lot\",\"Année\",\"Mois\",\"Catégorie\",\"Fournisseur\",\"Libellé\",\"Débit\",\"Crédit\",\"Impact\"]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "6992dcd7-b90d-4e57-9211-8a867071f83c",
"metadata": {},
"outputs": [],
"source": [
"staging_files = set()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "04139774-9a6d-472f-80e6-27415d3499b4",
"metadata": {},
"outputs": [],
"source": [
"def to_csv(df, dest):\n",
" if dest.exists():\n",
" df.to_csv(dest, mode=\"a\", header=False, index=False)\n",
" else:\n",
" df.to_csv(dest, index=False)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "41806d70-cb2d-4f0b-82e8-f647d37471bc",
"metadata": {},
"outputs": [],
"source": [
"for f in staging_path.glob(\"**/*.csv\"):\n",
" f.unlink()"
]
},
{
"cell_type": "markdown",
"id": "c8703ffa-fc39-4618-8998-87717c6c1e2f",
"metadata": {},
"source": [
"# Import history CRG\n",
"\n",
"Le but de cette partie est d'importer les anciens CRG et de les adapter au format actuel."
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "b4decfa2-3394-40e8-b012-2a9dc354d697",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"raw_path = Path(\"../PLESNA Compta SYSTEM/raw/CRG/\")\n",
"assert raw_path.exists()\n",
"list(raw_path.glob(\"*/**\"))"
]
},
{
"cell_type": "markdown",
"id": "8ec11543-d858-4c85-864a-ccbab830ef67",
"metadata": {},
"source": [
"## Import de `2019 et avant.xlsx`\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "c9167f6a-a622-41d7-b2af-a70c7ac08f70",
"metadata": {},
"outputs": [],
"source": [
"file = raw_path/\"2019 et avant.xlsx\"\n",
"assert file.exists()\n",
"df = pd.read_excel(file, sheet_name=\"IMI Gérence\", )\n",
" #parse_dates = [\"Date\"], date_format=\"%Y-%m%d\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "981884df-0b8d-43a1-b140-1fdf2e2c0b25",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['Caution', 'Solde Comptable', 'Xfert entre compa immeubles']\n"
]
},
{
"data": {
"text/plain": [
"{'Ascenseur': 'Ascenseur',\n",
" 'Charge Remboursement': 'Loyer Charge',\n",
" 'contrat assurance': 'Assurance',\n",
" 'contrat entretien': 'Entretien',\n",
" 'diagnostics': 'Diagnotics',\n",
" 'divers (plaques…)': 'Travaux',\n",
" 'Elec': 'Elec',\n",
" 'honor location': 'Hono Gestion',\n",
" 'honor EDL': 'Hono E/S',\n",
" 'honor gestion': 'Hono Gestion',\n",
" 'honor location': 'Hono E/S',\n",
" 'honor remise': 'Hono Gestion',\n",
" 'Loyer + Charges': 'Loyer Charge',\n",
" 'Tel': 'Tel',\n",
" 'travaux': 'Travaux'}"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cat = pd.read_excel(file, sheet_name=\"Catégorie Mapping\")\n",
"cat_drop = list(cat[cat[\"Nouvelles\"]==\"NE PAS IMPORTER\"][\"Anciennes\"])\n",
"print(cat_drop)\n",
"cat_trans = cat[cat[\"Nouvelles\"]!=\"NE PAS IMPORTER\"]\n",
"trans = {}\n",
"for _, (old, new) in cat_trans.iterrows():\n",
" trans[old] = new\n",
"trans"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "369b39ff-ad65-44b9-b7d7-ece13e173059",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Régie</th>\n",
" <th>Immeuble</th>\n",
" <th>Porte</th>\n",
" <th>Date</th>\n",
" <th>Catégorie</th>\n",
" <th>Libellé</th>\n",
" <th>Débit</th>\n",
" <th>Crédit</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Imi Gérance</td>\n",
" <td>S</td>\n",
" <td>5</td>\n",
" <td>2017-05-31</td>\n",
" <td>Loyer + Charges</td>\n",
" <td>Règl. Loyer 06/2017</td>\n",
" <td>NaN</td>\n",
" <td>720.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Imi Gérance</td>\n",
" <td>S</td>\n",
" <td>5</td>\n",
" <td>2017-05-31</td>\n",
" <td>Loyer + Charges</td>\n",
" <td>Règl. Prov. Char 06/2017</td>\n",
" <td>NaN</td>\n",
" <td>191.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Imi Gérance</td>\n",
" <td>S</td>\n",
" <td>1</td>\n",
" <td>2017-06-01</td>\n",
" <td>Loyer + Charges</td>\n",
" <td>Règl. Prov. Char 04 à 06/2017</td>\n",
" <td>NaN</td>\n",
" <td>633.28</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Imi Gérance</td>\n",
" <td>S</td>\n",
" <td>4</td>\n",
" <td>2017-06-01</td>\n",
" <td>Loyer + Charges</td>\n",
" <td>Règl. Loyer 06/2017</td>\n",
" <td>NaN</td>\n",
" <td>576.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Imi Gérance</td>\n",
" <td>S</td>\n",
" <td>4</td>\n",
" <td>2017-06-01</td>\n",
" <td>Loyer + Charges</td>\n",
" <td>Règl. Prov. Char 06/2017</td>\n",
" <td>NaN</td>\n",
" <td>31.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Régie Immeuble Porte Date Catégorie \\\n",
"0 Imi Gérance S 5 2017-05-31 Loyer + Charges \n",
"1 Imi Gérance S 5 2017-05-31 Loyer + Charges \n",
"2 Imi Gérance S 1 2017-06-01 Loyer + Charges \n",
"3 Imi Gérance S 4 2017-06-01 Loyer + Charges \n",
"4 Imi Gérance S 4 2017-06-01 Loyer + Charges \n",
"\n",
" Libellé Débit Crédit \n",
"0 Règl. Loyer 06/2017 NaN 720.00 \n",
"1 Règl. Prov. Char 06/2017 NaN 191.00 \n",
"2 Règl. Prov. Char 04 à 06/2017 NaN 633.28 \n",
"3 Règl. Loyer 06/2017 NaN 576.00 \n",
"4 Règl. Prov. Char 06/2017 NaN 31.00 "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "12358a6b-e563-44be-b6b2-35c5e5d1d35b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Régie object\n",
"Immeuble object\n",
"Porte object\n",
"Date datetime64[ns]\n",
"Catégorie object\n",
"Libellé object\n",
"Débit float64\n",
"Crédit float64\n",
"dtype: object"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.dtypes"
]
},
{
"cell_type": "markdown",
"id": "76a5d178-6f0c-4497-847f-2f318800dbfb",
"metadata": {},
"source": [
"Filter lines"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "8361bb7b-c422-4da6-8b3b-e43f50d4a1ec",
"metadata": {},
"outputs": [],
"source": [
"df = df[~df[\"Catégorie\"].isin(cat_drop)]"
]
},
{
"cell_type": "markdown",
"id": "6f95a6ee-6d0b-4417-b2c2-60faa4847e4f",
"metadata": {},
"source": [
"Featuring"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "7caa1b95-c321-4f8e-9f9b-c7e867e23921",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Régie</th>\n",
" <th>Immeuble</th>\n",
" <th>Porte</th>\n",
" <th>Date</th>\n",
" <th>Catégorie</th>\n",
" <th>Libellé</th>\n",
" <th>Débit</th>\n",
" <th>Crédit</th>\n",
" <th>Lot</th>\n",
" <th>Année</th>\n",
" <th>Mois</th>\n",
" <th>Fournisseur</th>\n",
" <th>Impact</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Imi Gérance</td>\n",
" <td>S</td>\n",
" <td>5</td>\n",
" <td>2017-05-31</td>\n",
" <td>Loyer Charge</td>\n",
" <td>Règl. Loyer 06/2017</td>\n",
" <td>0.0</td>\n",
" <td>720.00</td>\n",
" <td>S05</td>\n",
" <td>2017</td>\n",
" <td>05</td>\n",
" <td></td>\n",
" <td>720.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Imi Gérance</td>\n",
" <td>S</td>\n",
" <td>5</td>\n",
" <td>2017-05-31</td>\n",
" <td>Loyer Charge</td>\n",
" <td>Règl. Prov. Char 06/2017</td>\n",
" <td>0.0</td>\n",
" <td>191.00</td>\n",
" <td>S05</td>\n",
" <td>2017</td>\n",
" <td>05</td>\n",
" <td></td>\n",
" <td>191.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Imi Gérance</td>\n",
" <td>S</td>\n",
" <td>1</td>\n",
" <td>2017-06-01</td>\n",
" <td>Loyer Charge</td>\n",
" <td>Règl. Prov. Char 04 à 06/2017</td>\n",
" <td>0.0</td>\n",
" <td>633.28</td>\n",
" <td>S01</td>\n",
" <td>2017</td>\n",
" <td>06</td>\n",
" <td></td>\n",
" <td>633.28</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Imi Gérance</td>\n",
" <td>S</td>\n",
" <td>4</td>\n",
" <td>2017-06-01</td>\n",
" <td>Loyer Charge</td>\n",
" <td>Règl. Loyer 06/2017</td>\n",
" <td>0.0</td>\n",
" <td>576.00</td>\n",
" <td>S04</td>\n",
" <td>2017</td>\n",
" <td>06</td>\n",
" <td></td>\n",
" <td>576.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Imi Gérance</td>\n",
" <td>S</td>\n",
" <td>4</td>\n",
" <td>2017-06-01</td>\n",
" <td>Loyer Charge</td>\n",
" <td>Règl. Prov. Char 06/2017</td>\n",
" <td>0.0</td>\n",
" <td>31.00</td>\n",
" <td>S04</td>\n",
" <td>2017</td>\n",
" <td>06</td>\n",
" <td></td>\n",
" <td>31.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Régie Immeuble Porte Date Catégorie \\\n",
"0 Imi Gérance S 5 2017-05-31 Loyer Charge \n",
"1 Imi Gérance S 5 2017-05-31 Loyer Charge \n",
"2 Imi Gérance S 1 2017-06-01 Loyer Charge \n",
"3 Imi Gérance S 4 2017-06-01 Loyer Charge \n",
"4 Imi Gérance S 4 2017-06-01 Loyer Charge \n",
"\n",
" Libellé Débit Crédit Lot Année Mois Fournisseur \\\n",
"0 Règl. Loyer 06/2017 0.0 720.00 S05 2017 05 \n",
"1 Règl. Prov. Char 06/2017 0.0 191.00 S05 2017 05 \n",
"2 Règl. Prov. Char 04 à 06/2017 0.0 633.28 S01 2017 06 \n",
"3 Règl. Loyer 06/2017 0.0 576.00 S04 2017 06 \n",
"4 Règl. Prov. Char 06/2017 0.0 31.00 S04 2017 06 \n",
"\n",
" Impact \n",
"0 720.00 \n",
"1 191.00 \n",
"2 633.28 \n",
"3 576.00 \n",
"4 31.00 "
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = df.assign(\n",
" Débit = df[\"Débit\"].fillna(0),\n",
" Crédit = df[\"Crédit\"].fillna(0),\n",
" Lot = df[\"Immeuble\"].astype(str)+df[\"Porte\"].astype(\"str\").str.zfill(2),\n",
" Année = df[\"Date\"].astype(str).str.slice(0,4),\n",
" Mois = df[\"Date\"].astype(str).str.slice(5,7),\n",
" Catégorie = df[\"Catégorie\"].replace(trans),\n",
" Fournisseur = \"\",\n",
")\n",
"df = df.assign(\n",
" Impact = df[\"Crédit\"] - df[\"Débit\"],\n",
")\n",
"df.head()"
]
},
{
"cell_type": "markdown",
"id": "b1951ea6-6e49-41b3-9c2f-92328e9d76ed",
"metadata": {},
"source": [
"Verify columns and select thoses"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "a9c243b5-6d66-4113-8ddd-2c35f6a8d8d2",
"metadata": {},
"outputs": [],
"source": [
"df = df[staging_columns]"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "c90216b7-fa62-46d9-b93e-0b7626d40832",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"../PLESNA Compta SYSTEM/staging/CRG/2017.csv\n",
"../PLESNA Compta SYSTEM/staging/CRG/2018.csv\n",
"../PLESNA Compta SYSTEM/staging/CRG/2019.csv\n",
"../PLESNA Compta SYSTEM/staging/CRG/2020.csv\n"
]
}
],
"source": [
"\n",
"for year in df[\"Année\"].unique():\n",
" df_year = df[df[\"Année\"]==year]\n",
" dest = staging_path/ f\"CRG/{year}.csv\"\n",
" print(dest)\n",
" to_csv(df_year, dest)\n",
" staging_files.add(dest)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "786d00dd-3fc6-4bd6-9a05-bf9db16eb5be",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "c22a8c5f-4fad-4b7e-bcfa-b058cbd42f8f",
"metadata": {},
"source": [
"## Import `2020 2022.xslx`"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "ac33c438-f155-450c-b805-48282e5b1f67",
"metadata": {},
"outputs": [],
"source": [
"file = raw_path/\"2020 2022.xlsx\"\n",
"assert file.exists()\n",
"df = pd.read_excel(file, sheet_name=\"DB CRG\", )"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "df0f36e3-ee81-4195-bbb3-f9307294440f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['caution', 'Solde Comptable', 'Xfert entre compa immeubles', 'Xfert Tréso']\n"
]
},
{
"data": {
"text/plain": [
"{'contentieux': 'Contentieux',\n",
" 'contrat ascenseur': 'Ascenseur',\n",
" 'contrat assurance': 'Assurance',\n",
" 'contrat assurance juridique': 'Assurance',\n",
" 'contrat entretien': 'Entretien',\n",
" 'diagnostics': 'Diagnotics',\n",
" 'divers': 'Travaux',\n",
" 'divers (plaques…)': 'Travaux',\n",
" 'eau': 'Eau',\n",
" 'eau ': 'Eau',\n",
" 'électricité': 'Elec',\n",
" 'honor location': 'Hono Gestion',\n",
" 'honor divers': 'Hono Gestion',\n",
" 'honor EDL': 'Hono E/S',\n",
" 'honor edl ': 'Hono E/S',\n",
" 'honor gestion': 'Hono Gestion',\n",
" 'honor location': 'Hono E/S',\n",
" 'honor remise': 'Hono Gestion',\n",
" 'Honoraire Gestion': 'Hono Gestion',\n",
" 'loyer+charge': 'Loyer Charge',\n",
" 'travaux': 'Travaux'}"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cat = pd.read_excel(file, sheet_name=\"Catégories\")\n",
"cat_drop = list(cat[cat[\"Nouvelles\"]==\"NE PAS IMPORTER\"][\"Anciennes\"])\n",
"print(cat_drop)\n",
"cat_trans = cat[cat[\"Nouvelles\"]!=\"NE PAS IMPORTER\"]\n",
"trans = {}\n",
"for _, (old, new) in cat_trans.iterrows():\n",
" trans[old] = new\n",
"trans"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "a177e08d-8ba3-4bbb-ac50-ef52cd047137",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>N°</th>\n",
" <th>Régie</th>\n",
" <th>immeuble</th>\n",
" <th>porte</th>\n",
" <th>Date</th>\n",
" <th>Categorie</th>\n",
" <th>Libellé</th>\n",
" <th>Débit</th>\n",
" <th>Crédit</th>\n",
" <th>Réserve</th>\n",
" <th>Année</th>\n",
" <th>Mois</th>\n",
" <th>Trimestre</th>\n",
" <th>Lot</th>\n",
" <th>Impact</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1.0</td>\n",
" <td>IMIGérance</td>\n",
" <td>S</td>\n",
" <td>5</td>\n",
" <td>2017-05-31</td>\n",
" <td>loyer+charge</td>\n",
" <td>Règl. Loyer 06/2017</td>\n",
" <td>NaN</td>\n",
" <td>720.00</td>\n",
" <td>NaN</td>\n",
" <td>2017</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>S05</td>\n",
" <td>-720.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2.0</td>\n",
" <td>IMIGérance</td>\n",
" <td>S</td>\n",
" <td>5</td>\n",
" <td>2017-05-31</td>\n",
" <td>loyer+charge</td>\n",
" <td>Règl. Prov. Char 06/2017</td>\n",
" <td>NaN</td>\n",
" <td>191.00</td>\n",
" <td>NaN</td>\n",
" <td>2017</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>S05</td>\n",
" <td>-191.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3.0</td>\n",
" <td>IMIGérance</td>\n",
" <td>S</td>\n",
" <td>1</td>\n",
" <td>2017-06-01</td>\n",
" <td>loyer+charge</td>\n",
" <td>Règl. Prov. Char 04 à 06/2017</td>\n",
" <td>NaN</td>\n",
" <td>633.28</td>\n",
" <td>NaN</td>\n",
" <td>2017</td>\n",
" <td>6</td>\n",
" <td>2</td>\n",
" <td>S01</td>\n",
" <td>-633.28</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4.0</td>\n",
" <td>IMIGérance</td>\n",
" <td>S</td>\n",
" <td>4</td>\n",
" <td>2017-06-01</td>\n",
" <td>loyer+charge</td>\n",
" <td>Règl. Loyer 06/2017</td>\n",
" <td>NaN</td>\n",
" <td>576.00</td>\n",
" <td>NaN</td>\n",
" <td>2017</td>\n",
" <td>6</td>\n",
" <td>2</td>\n",
" <td>S04</td>\n",
" <td>-576.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5.0</td>\n",
" <td>IMIGérance</td>\n",
" <td>S</td>\n",
" <td>4</td>\n",
" <td>2017-06-01</td>\n",
" <td>loyer+charge</td>\n",
" <td>Règl. Prov. Char 06/2017</td>\n",
" <td>NaN</td>\n",
" <td>31.00</td>\n",
" <td>NaN</td>\n",
" <td>2017</td>\n",
" <td>6</td>\n",
" <td>2</td>\n",
" <td>S04</td>\n",
" <td>-31.00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" N° Régie immeuble porte Date Categorie \\\n",
"0 1.0 IMIGérance S 5 2017-05-31 loyer+charge \n",
"1 2.0 IMIGérance S 5 2017-05-31 loyer+charge \n",
"2 3.0 IMIGérance S 1 2017-06-01 loyer+charge \n",
"3 4.0 IMIGérance S 4 2017-06-01 loyer+charge \n",
"4 5.0 IMIGérance S 4 2017-06-01 loyer+charge \n",
"\n",
" Libellé Débit Crédit Réserve Année Mois \\\n",
"0 Règl. Loyer 06/2017 NaN 720.00 NaN 2017 5 \n",
"1 Règl. Prov. Char 06/2017 NaN 191.00 NaN 2017 5 \n",
"2 Règl. Prov. Char 04 à 06/2017 NaN 633.28 NaN 2017 6 \n",
"3 Règl. Loyer 06/2017 NaN 576.00 NaN 2017 6 \n",
"4 Règl. Prov. Char 06/2017 NaN 31.00 NaN 2017 6 \n",
"\n",
" Trimestre Lot Impact \n",
"0 2 S05 -720.00 \n",
"1 2 S05 -191.00 \n",
"2 2 S01 -633.28 \n",
"3 2 S04 -576.00 \n",
"4 2 S04 -31.00 "
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "eac9a790-08f0-4e75-b5dd-087496949c71",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>N°</th>\n",
" <th>Régie</th>\n",
" <th>immeuble</th>\n",
" <th>porte</th>\n",
" <th>Date</th>\n",
" <th>Categorie</th>\n",
" <th>Libellé</th>\n",
" <th>Débit</th>\n",
" <th>Crédit</th>\n",
" <th>Réserve</th>\n",
" <th>Année</th>\n",
" <th>Mois</th>\n",
" <th>Trimestre</th>\n",
" <th>Lot</th>\n",
" <th>Impact</th>\n",
" <th>Immeuble</th>\n",
" <th>Porte</th>\n",
" <th>Catégorie</th>\n",
" <th>Fournisseur</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1.0</td>\n",
" <td>IMIGérance</td>\n",
" <td>S</td>\n",
" <td>5</td>\n",
" <td>2017-05-31</td>\n",
" <td>loyer+charge</td>\n",
" <td>Règl. Loyer 06/2017</td>\n",
" <td>0.0</td>\n",
" <td>720.00</td>\n",
" <td>NaN</td>\n",
" <td>2017</td>\n",
" <td>05</td>\n",
" <td>2</td>\n",
" <td>S05</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" <td>5</td>\n",
" <td>Loyer Charge</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2.0</td>\n",
" <td>IMIGérance</td>\n",
" <td>S</td>\n",
" <td>5</td>\n",
" <td>2017-05-31</td>\n",
" <td>loyer+charge</td>\n",
" <td>Règl. Prov. Char 06/2017</td>\n",
" <td>0.0</td>\n",
" <td>191.00</td>\n",
" <td>NaN</td>\n",
" <td>2017</td>\n",
" <td>05</td>\n",
" <td>2</td>\n",
" <td>S05</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" <td>5</td>\n",
" <td>Loyer Charge</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3.0</td>\n",
" <td>IMIGérance</td>\n",
" <td>S</td>\n",
" <td>1</td>\n",
" <td>2017-06-01</td>\n",
" <td>loyer+charge</td>\n",
" <td>Règl. Prov. Char 04 à 06/2017</td>\n",
" <td>0.0</td>\n",
" <td>633.28</td>\n",
" <td>NaN</td>\n",
" <td>2017</td>\n",
" <td>06</td>\n",
" <td>2</td>\n",
" <td>S01</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" <td>1</td>\n",
" <td>Loyer Charge</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4.0</td>\n",
" <td>IMIGérance</td>\n",
" <td>S</td>\n",
" <td>4</td>\n",
" <td>2017-06-01</td>\n",
" <td>loyer+charge</td>\n",
" <td>Règl. Loyer 06/2017</td>\n",
" <td>0.0</td>\n",
" <td>576.00</td>\n",
" <td>NaN</td>\n",
" <td>2017</td>\n",
" <td>06</td>\n",
" <td>2</td>\n",
" <td>S04</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" <td>4</td>\n",
" <td>Loyer Charge</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5.0</td>\n",
" <td>IMIGérance</td>\n",
" <td>S</td>\n",
" <td>4</td>\n",
" <td>2017-06-01</td>\n",
" <td>loyer+charge</td>\n",
" <td>Règl. Prov. Char 06/2017</td>\n",
" <td>0.0</td>\n",
" <td>31.00</td>\n",
" <td>NaN</td>\n",
" <td>2017</td>\n",
" <td>06</td>\n",
" <td>2</td>\n",
" <td>S04</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" <td>4</td>\n",
" <td>Loyer Charge</td>\n",
" <td></td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" N° Régie immeuble porte Date Categorie \\\n",
"0 1.0 IMIGérance S 5 2017-05-31 loyer+charge \n",
"1 2.0 IMIGérance S 5 2017-05-31 loyer+charge \n",
"2 3.0 IMIGérance S 1 2017-06-01 loyer+charge \n",
"3 4.0 IMIGérance S 4 2017-06-01 loyer+charge \n",
"4 5.0 IMIGérance S 4 2017-06-01 loyer+charge \n",
"\n",
" Libellé Débit Crédit Réserve Année Mois \\\n",
"0 Règl. Loyer 06/2017 0.0 720.00 NaN 2017 05 \n",
"1 Règl. Prov. Char 06/2017 0.0 191.00 NaN 2017 05 \n",
"2 Règl. Prov. Char 04 à 06/2017 0.0 633.28 NaN 2017 06 \n",
"3 Règl. Loyer 06/2017 0.0 576.00 NaN 2017 06 \n",
"4 Règl. Prov. Char 06/2017 0.0 31.00 NaN 2017 06 \n",
"\n",
" Trimestre Lot Impact Immeuble Porte Catégorie Fournisseur \n",
"0 2 S05 NaN S 5 Loyer Charge \n",
"1 2 S05 NaN S 5 Loyer Charge \n",
"2 2 S01 NaN S 1 Loyer Charge \n",
"3 2 S04 NaN S 4 Loyer Charge \n",
"4 2 S04 NaN S 4 Loyer Charge "
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = df.assign(\n",
" Débit = df[\"Débit\"].fillna(0),\n",
" Immeuble = df[\"immeuble\"],\n",
" Porte = df[\"porte\"],\n",
" Crédit = df[\"Crédit\"].fillna(0),\n",
" Lot = df[\"immeuble\"].astype(str)+df[\"porte\"].astype(\"str\").str.zfill(2),\n",
" Année = df[\"Date\"].astype(str).str.slice(0,4),\n",
" Mois = df[\"Date\"].astype(str).str.slice(5,7),\n",
" Impact = df[\"Crédit\"] - df[\"Débit\"],\n",
" Catégorie = df[\"Categorie\"].replace(trans),\n",
" Fournisseur = \"\",\n",
")\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "0b0bd55e-ff4d-49be-9e77-8b47d8c3a5cd",
"metadata": {},
"outputs": [],
"source": [
"df = df[staging_columns]"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "49d216c9-ad59-4db3-ba47-eec840ae53d4",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"../PLESNA Compta SYSTEM/staging/CRG/2017.csv\n",
"../PLESNA Compta SYSTEM/staging/CRG/2018.csv\n",
"../PLESNA Compta SYSTEM/staging/CRG/2019.csv\n",
"../PLESNA Compta SYSTEM/staging/CRG/2020.csv\n",
"../PLESNA Compta SYSTEM/staging/CRG/2021.csv\n",
"../PLESNA Compta SYSTEM/staging/CRG/2022.csv\n"
]
}
],
"source": [
"\n",
"for year in df[\"Année\"].unique():\n",
" df_year = df[df[\"Année\"]==year]\n",
" dest = staging_path/ f\"CRG/{year}.csv\"\n",
" print(dest)\n",
" to_csv(df_year, dest)\n",
" staging_files.add(dest)"
]
},
{
"cell_type": "markdown",
"id": "84d4bea9-59e6-4ca3-865e-5add5a992913",
"metadata": {},
"source": [
"## Import de `2023`"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "5815f53e-e665-4caa-a510-f2497c7ca16c",
"metadata": {},
"outputs": [],
"source": [
"file = raw_path/\"2023.xlsx\"\n",
"assert file.exists()\n",
"df = pd.read_excel(file, sheet_name=\"DB CRG 2023 ...\", )\n",
"year = 2023"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "1ff99c68-8bcb-4957-a359-a7ebb4dce337",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Régie</th>\n",
" <th>Immeuble</th>\n",
" <th>Porte</th>\n",
" <th>Mois</th>\n",
" <th>Catégorie</th>\n",
" <th>Fournisseur</th>\n",
" <th>Libellé</th>\n",
" <th>Débit</th>\n",
" <th>Crédit</th>\n",
" <th>Lot</th>\n",
" <th>Année</th>\n",
" <th>Impact</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Gelas</td>\n",
" <td>B</td>\n",
" <td>*</td>\n",
" <td>1</td>\n",
" <td>Entretien</td>\n",
" <td>POEZEVARA NETTOYAGE</td>\n",
" <td>FACTURE ENTRETIEN IMMEUBLE PC N° FC61954</td>\n",
" <td>491.17</td>\n",
" <td>0.0</td>\n",
" <td>B0*</td>\n",
" <td>2023</td>\n",
" <td>-491.17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Gelas</td>\n",
" <td>B</td>\n",
" <td>*</td>\n",
" <td>1</td>\n",
" <td>Hono Gestion</td>\n",
" <td>Rosier</td>\n",
" <td>Honoraires H.T.</td>\n",
" <td>699.10</td>\n",
" <td>0.0</td>\n",
" <td>B0*</td>\n",
" <td>2023</td>\n",
" <td>-699.10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Gelas</td>\n",
" <td>B</td>\n",
" <td>*</td>\n",
" <td>1</td>\n",
" <td>Hono Gestion</td>\n",
" <td>Rosier</td>\n",
" <td>TVA/Honoraires ( 20.00 % )</td>\n",
" <td>139.82</td>\n",
" <td>0.0</td>\n",
" <td>B0*</td>\n",
" <td>2023</td>\n",
" <td>-139.82</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Gelas</td>\n",
" <td>M</td>\n",
" <td>*</td>\n",
" <td>1</td>\n",
" <td>Entretien</td>\n",
" <td>REMALI BRIL'OR</td>\n",
" <td>PC ENTRETIEN - DECEMBRE 22</td>\n",
" <td>363.77</td>\n",
" <td>0.0</td>\n",
" <td>M0*</td>\n",
" <td>2023</td>\n",
" <td>-363.77</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Gelas</td>\n",
" <td>M</td>\n",
" <td>*</td>\n",
" <td>1</td>\n",
" <td>Entretien</td>\n",
" <td>ASTEC</td>\n",
" <td>MARIETTON ENTRETIEN 1T23</td>\n",
" <td>453.79</td>\n",
" <td>0.0</td>\n",
" <td>M0*</td>\n",
" <td>2023</td>\n",
" <td>-453.79</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Régie Immeuble Porte Mois Catégorie Fournisseur \\\n",
"0 Gelas B * 1 Entretien POEZEVARA NETTOYAGE \n",
"1 Gelas B * 1 Hono Gestion Rosier \n",
"2 Gelas B * 1 Hono Gestion Rosier \n",
"3 Gelas M * 1 Entretien REMALI BRIL'OR \n",
"4 Gelas M * 1 Entretien ASTEC \n",
"\n",
" Libellé Débit Crédit Lot Année \\\n",
"0 FACTURE ENTRETIEN IMMEUBLE PC N° FC61954 491.17 0.0 B0* 2023 \n",
"1 Honoraires H.T. 699.10 0.0 B0* 2023 \n",
"2 TVA/Honoraires ( 20.00 % ) 139.82 0.0 B0* 2023 \n",
"3 PC ENTRETIEN - DECEMBRE 22 363.77 0.0 M0* 2023 \n",
"4 MARIETTON ENTRETIEN 1T23 453.79 0.0 M0* 2023 \n",
"\n",
" Impact \n",
"0 -491.17 \n",
"1 -699.10 \n",
"2 -139.82 \n",
"3 -363.77 \n",
"4 -453.79 "
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = df.assign(\n",
" Débit = df[\"Débit\"].fillna(0),\n",
" Crédit = df[\"Crédit\"].fillna(0),\n",
" Lot = df[\"Immeuble\"].astype(str)+df[\"Porte\"].astype(\"str\").str.zfill(2),\n",
" Année = year,\n",
")\n",
"df = df.assign(\n",
" Impact = df[\"Crédit\"] - df[\"Débit\"],\n",
")\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "47284150-a63d-4427-9c7c-8ed5136df1f1",
"metadata": {},
"outputs": [],
"source": [
"df = df[staging_columns]"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "4627682f-a5a9-4ade-8dee-113a2399f85e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"../PLESNA Compta SYSTEM/staging/CRG/2023.csv\n"
]
}
],
"source": [
"\n",
"dest = staging_path/ f\"CRG/{year}.csv\"\n",
"print(dest)\n",
"to_csv(df_year, dest)\n",
"staging_files.add(dest)"
]
},
{
"cell_type": "markdown",
"id": "1e4bbd3a-f77d-4b16-bdf5-f2532c4227a5",
"metadata": {},
"source": [
"# Import de l'historique de banque"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "7da38abe-5382-4366-9824-09e0dd2a02d6",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[PosixPath('../PLESNA Compta SYSTEM/raw/Banque/Histoire depuis 2020.xlsx')]"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"banque_path = Path(\"../PLESNA Compta SYSTEM/raw/Banque/\")\n",
"assert raw_path.exists()\n",
"list(banque_path.glob(\"*\"))"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "3f6d380a-35f8-4afb-b455-76f07a06993f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Banque', 'Immeuble', 'Porte', 'Lot', 'date', 'Année', 'Mois',\n",
" 'Catégorie', 'Libellé', 'Débit', 'Crédit', 'Impact'],\n",
" dtype='object')"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"schema_banque = staging_path / \"Banque/Schema.xlsx\"\n",
"assert schema_banque.exists()\n",
"banque_columns = pd.read_excel(schema_banque, sheet_name=\"Schema\").columns\n",
"banque_columns"
]
},
{
"cell_type": "markdown",
"id": "0bb2e54f-6968-4f07-8379-22dd973e71d8",
"metadata": {},
"source": [
"## Import de `Histoire ...`"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "a751b591-66c0-4c8c-9d5c-c8a51adb1e69",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>N°</th>\n",
" <th>Immeuble</th>\n",
" <th>Porte</th>\n",
" <th>date</th>\n",
" <th>catégorie</th>\n",
" <th>libellé</th>\n",
" <th>DEBIT</th>\n",
" <th>CREDIT</th>\n",
" <th>BANQUE</th>\n",
" <th>Année</th>\n",
" <th>Mois</th>\n",
" <th>Lot</th>\n",
" <th>Impact</th>\n",
" <th>CM</th>\n",
" <th>SL</th>\n",
" <th>BNP</th>\n",
" <th>Solde</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2020</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2020-01-15</td>\n",
" <td>Frais Bancaire</td>\n",
" <td>NaN</td>\n",
" <td>14.1</td>\n",
" <td>NaN</td>\n",
" <td>CM</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2</td>\n",
" <td>S</td>\n",
" <td>NaN</td>\n",
" <td>2020-01-15</td>\n",
" <td>TF</td>\n",
" <td>Impots</td>\n",
" <td>1570.0</td>\n",
" <td>NaN</td>\n",
" <td>CM</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3</td>\n",
" <td>M</td>\n",
" <td>NaN</td>\n",
" <td>2020-01-15</td>\n",
" <td>TF</td>\n",
" <td>Impots</td>\n",
" <td>1209.0</td>\n",
" <td>NaN</td>\n",
" <td>CM</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4</td>\n",
" <td>B</td>\n",
" <td>NaN</td>\n",
" <td>2020-01-15</td>\n",
" <td>TF</td>\n",
" <td>Impots</td>\n",
" <td>905.0</td>\n",
" <td>NaN</td>\n",
" <td>CM</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>375</th>\n",
" <td>0</td>\n",
" <td>M</td>\n",
" <td>NaN</td>\n",
" <td>2022-12-24</td>\n",
" <td>Revenue Gérance</td>\n",
" <td>VIR SEPA</td>\n",
" <td>NaN</td>\n",
" <td>10118.86</td>\n",
" <td>BNP</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>376</th>\n",
" <td>0</td>\n",
" <td>B</td>\n",
" <td>NaN</td>\n",
" <td>2022-12-24</td>\n",
" <td>Revenue Gérance</td>\n",
" <td>VIR SEPA</td>\n",
" <td>NaN</td>\n",
" <td>10343.82</td>\n",
" <td>BNP</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>377</th>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2022-12-28</td>\n",
" <td>Frais Financier</td>\n",
" <td>ECHEANCE PRET 01383 60934927</td>\n",
" <td>1250.0</td>\n",
" <td>NaN</td>\n",
" <td>BNP</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>378</th>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2022-12-30</td>\n",
" <td>Gerant</td>\n",
" <td>TNS Gerant A. Bertrand Aout</td>\n",
" <td>1000.0</td>\n",
" <td>NaN</td>\n",
" <td>BNP</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>379</th>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2022-12-30</td>\n",
" <td>Comptable</td>\n",
" <td>NaN</td>\n",
" <td>660.0</td>\n",
" <td>NaN</td>\n",
" <td>BNP</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>380 rows × 17 columns</p>\n",
"</div>"
],
"text/plain": [
" N° Immeuble Porte date catégorie \\\n",
"0 2020 NaN NaN NaT NaN \n",
"1 1 NaN NaN 2020-01-15 Frais Bancaire \n",
"2 2 S NaN 2020-01-15 TF \n",
"3 3 M NaN 2020-01-15 TF \n",
"4 4 B NaN 2020-01-15 TF \n",
".. ... ... ... ... ... \n",
"375 0 M NaN 2022-12-24 Revenue Gérance \n",
"376 0 B NaN 2022-12-24 Revenue Gérance \n",
"377 0 NaN NaN 2022-12-28 Frais Financier \n",
"378 0 NaN NaN 2022-12-30 Gerant \n",
"379 0 NaN NaN 2022-12-30 Comptable \n",
"\n",
" libellé DEBIT CREDIT BANQUE Année Mois Lot \\\n",
"0 NaN NaN NaN NaN NaN NaN NaN \n",
"1 NaN 14.1 NaN CM 0.0 0.0 0.0 \n",
"2 Impots 1570.0 NaN CM 0.0 0.0 0.0 \n",
"3 Impots 1209.0 NaN CM 0.0 0.0 0.0 \n",
"4 Impots 905.0 NaN CM 0.0 0.0 0.0 \n",
".. ... ... ... ... ... ... ... \n",
"375 VIR SEPA NaN 10118.86 BNP 0.0 0.0 0.0 \n",
"376 VIR SEPA NaN 10343.82 BNP 0.0 0.0 0.0 \n",
"377 ECHEANCE PRET 01383 60934927 1250.0 NaN BNP 0.0 0.0 0.0 \n",
"378 TNS Gerant A. Bertrand Aout 1000.0 NaN BNP 0.0 0.0 0.0 \n",
"379 NaN 660.0 NaN BNP 0.0 0.0 0.0 \n",
"\n",
" Impact CM SL BNP Solde \n",
"0 NaN NaN NaN NaN NaN \n",
"1 0.0 0.0 0.0 0.0 0.0 \n",
"2 0.0 0.0 0.0 0.0 0.0 \n",
"3 0.0 0.0 0.0 0.0 0.0 \n",
"4 0.0 0.0 0.0 0.0 0.0 \n",
".. ... ... ... ... ... \n",
"375 0.0 0.0 0.0 0.0 0.0 \n",
"376 0.0 0.0 0.0 0.0 0.0 \n",
"377 0.0 0.0 0.0 0.0 0.0 \n",
"378 0.0 0.0 0.0 0.0 0.0 \n",
"379 0.0 0.0 0.0 0.0 0.0 \n",
"\n",
"[380 rows x 17 columns]"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"file = banque_path/\"Histoire depuis 2020.xlsx\"\n",
"assert file.exists()\n",
"df = pd.read_excel(file, skiprows=2)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "9cf1be24-0807-431f-9b1e-98b95a2a1689",
"metadata": {},
"outputs": [],
"source": [
"# Bad line clean\n",
"df = df.dropna(subset=[\"Immeuble\", \"Porte\", \"date\"], how=\"all\")"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "c6cb1ad7-bb9e-4196-beb4-ac096070a090",
"metadata": {},
"outputs": [],
"source": [
"df = df.assign(\n",
" Banque = df[\"BANQUE\"],\n",
" Catégorie = df[\"catégorie\"],\n",
" Libellé = df[\"libellé\"],\n",
" Débit = df[\"DEBIT\"].fillna(0),\n",
" Crédit = df[\"CREDIT\"].fillna(0),\n",
" Année = df[\"date\"].astype(str).str.slice(0,4),\n",
" Mois = df[\"date\"].astype(str).str.slice(5,7),\n",
")\n",
"df = df.assign(\n",
" Impact = df[\"Crédit\"] - df[\"Débit\"],\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "d73cdaf2-72c3-4c90-ba2d-32d7377b5aee",
"metadata": {},
"outputs": [],
"source": [
"df = df[banque_columns]"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "e58c891e-c91a-4832-bb03-43d2e11cc985",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Banque</th>\n",
" <th>Immeuble</th>\n",
" <th>Porte</th>\n",
" <th>Lot</th>\n",
" <th>date</th>\n",
" <th>Année</th>\n",
" <th>Mois</th>\n",
" <th>Catégorie</th>\n",
" <th>Libellé</th>\n",
" <th>Débit</th>\n",
" <th>Crédit</th>\n",
" <th>Impact</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>CM</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>2020-01-15</td>\n",
" <td>2020</td>\n",
" <td>01</td>\n",
" <td>Frais Bancaire</td>\n",
" <td>NaN</td>\n",
" <td>14.1</td>\n",
" <td>0.0</td>\n",
" <td>-14.1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>CM</td>\n",
" <td>S</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>2020-01-15</td>\n",
" <td>2020</td>\n",
" <td>01</td>\n",
" <td>TF</td>\n",
" <td>Impots</td>\n",
" <td>1570.0</td>\n",
" <td>0.0</td>\n",
" <td>-1570.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>CM</td>\n",
" <td>M</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>2020-01-15</td>\n",
" <td>2020</td>\n",
" <td>01</td>\n",
" <td>TF</td>\n",
" <td>Impots</td>\n",
" <td>1209.0</td>\n",
" <td>0.0</td>\n",
" <td>-1209.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>CM</td>\n",
" <td>B</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>2020-01-15</td>\n",
" <td>2020</td>\n",
" <td>01</td>\n",
" <td>TF</td>\n",
" <td>Impots</td>\n",
" <td>905.0</td>\n",
" <td>0.0</td>\n",
" <td>-905.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>CM</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" <td>2020-01-15</td>\n",
" <td>2020</td>\n",
" <td>01</td>\n",
" <td>Frais Financier</td>\n",
" <td>Prêts CM 1,8M€</td>\n",
" <td>5715.0</td>\n",
" <td>0.0</td>\n",
" <td>-5715.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Banque Immeuble Porte Lot date Année Mois Catégorie \\\n",
"1 CM NaN NaN 0.0 2020-01-15 2020 01 Frais Bancaire \n",
"2 CM S NaN 0.0 2020-01-15 2020 01 TF \n",
"3 CM M NaN 0.0 2020-01-15 2020 01 TF \n",
"4 CM B NaN 0.0 2020-01-15 2020 01 TF \n",
"5 CM NaN NaN 0.0 2020-01-15 2020 01 Frais Financier \n",
"\n",
" Libellé Débit Crédit Impact \n",
"1 NaN 14.1 0.0 -14.1 \n",
"2 Impots 1570.0 0.0 -1570.0 \n",
"3 Impots 1209.0 0.0 -1209.0 \n",
"4 Impots 905.0 0.0 -905.0 \n",
"5 Prêts CM 1,8M€ 5715.0 0.0 -5715.0 "
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "fe760cd7-2cb3-4c40-91c6-92d87bbe48d1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"../PLESNA Compta SYSTEM/staging/Banque/2020.csv\n",
"../PLESNA Compta SYSTEM/staging/Banque/2021.csv\n",
"../PLESNA Compta SYSTEM/staging/Banque/2022.csv\n"
]
}
],
"source": [
"for year in df[\"Année\"].unique():\n",
" df_year = df[df[\"Année\"]==year]\n",
" dest = staging_path/ f\"Banque/{year}.csv\"\n",
" print(dest)\n",
" to_csv(df_year, dest)\n",
" staging_files.add(dest)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c84472c0-e71d-4fc1-bf92-9093d895dd40",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "bdb5dd69-7722-4e7f-b734-65b608117854",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "41b90e89-9d3b-462c-ab00-6da28b8e16c8",
"metadata": {},
"source": [
"## Clean duplicates"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "8bfa036d-0526-4756-b556-e19401aeac71",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{PosixPath('../PLESNA Compta SYSTEM/staging/Banque/2020.csv'),\n",
" PosixPath('../PLESNA Compta SYSTEM/staging/Banque/2021.csv'),\n",
" PosixPath('../PLESNA Compta SYSTEM/staging/Banque/2022.csv'),\n",
" PosixPath('../PLESNA Compta SYSTEM/staging/CRG/2017.csv'),\n",
" PosixPath('../PLESNA Compta SYSTEM/staging/CRG/2018.csv'),\n",
" PosixPath('../PLESNA Compta SYSTEM/staging/CRG/2019.csv'),\n",
" PosixPath('../PLESNA Compta SYSTEM/staging/CRG/2020.csv'),\n",
" PosixPath('../PLESNA Compta SYSTEM/staging/CRG/2021.csv'),\n",
" PosixPath('../PLESNA Compta SYSTEM/staging/CRG/2022.csv'),\n",
" PosixPath('../PLESNA Compta SYSTEM/staging/CRG/2023.csv')}"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"staging_files"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "9f0d1d85-d241-43d3-93d8-b3e52fdcaf51",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"../PLESNA Compta SYSTEM/staging/Banque/2020.csv got 0 duplicated rows\n",
"../PLESNA Compta SYSTEM/staging/CRG/2018.csv got 72 duplicated rows\n",
"../PLESNA Compta SYSTEM/staging/Banque/2021.csv got 0 duplicated rows\n",
"../PLESNA Compta SYSTEM/staging/CRG/2017.csv got 12 duplicated rows\n",
"../PLESNA Compta SYSTEM/staging/CRG/2020.csv got 29 duplicated rows\n",
"../PLESNA Compta SYSTEM/staging/Banque/2022.csv got 1 duplicated rows\n",
"../PLESNA Compta SYSTEM/staging/CRG/2019.csv got 24 duplicated rows\n",
"../PLESNA Compta SYSTEM/staging/CRG/2021.csv got 2 duplicated rows\n",
"../PLESNA Compta SYSTEM/staging/CRG/2023.csv got 0 duplicated rows\n",
"../PLESNA Compta SYSTEM/staging/CRG/2022.csv got 0 duplicated rows\n"
]
}
],
"source": [
"for file in staging_files:\n",
" df = pd.read_csv(file)\n",
" print(f\"{file} got {len(df[df.duplicated()])} duplicated rows\")\n",
" df = df[~df.duplicated()]\n",
" to_csv(df, file)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "31fd53e4-6915-4087-b0f9-631f3726f5d4",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}