# Gold vers DataMart

In [1]:
import pandas as pd
from pathlib import Path

In [2]:
gold_path = Path("../PLESNA Compta SYSTEM/gold")
assert gold_path.exists()
mart_path = Path("../PLESNA Compta SYSTEM/datamart")
assert mart_path.exists()

In [3]:
def to_csv(df, dest):
    if dest.exists():
        df.to_csv(dest, mode="a", header=False, index=False)
    else:
        df.to_csv(dest, index=False)

In [4]:
for f in mart_path.glob("**/*.csv"):
    f.unlink()

## Agrégation de toute la CRG

In [5]:
crg_path = gold_path/"CRG"
assert crg_path.exists()
crg_files = list(crg_path.glob("*.csv"))

In [6]:
dfs = []
for f in crg_files:
    print(f)
    dfs.append(pd.read_csv(f))
df_crg = pd.concat(dfs)

../PLESNA Compta SYSTEM/gold/CRG/2020.csv
../PLESNA Compta SYSTEM/gold/CRG/2018.csv
../PLESNA Compta SYSTEM/gold/CRG/2022.csv
../PLESNA Compta SYSTEM/gold/CRG/2021.csv
../PLESNA Compta SYSTEM/gold/CRG/2023.csv
../PLESNA Compta SYSTEM/gold/CRG/2019.csv
../PLESNA Compta SYSTEM/gold/CRG/2017.csv


## Agrégation de toute la banque

In [7]:
banque_path = gold_path / "Banque"
assert banque_path.exists()
banque_files = list(banque_path.glob("*.csv"))

In [8]:
dfs = []
for f in banque_files:
    print(f)
    dfs.append(pd.read_csv(f))
df_banque = pd.concat(dfs)

../PLESNA Compta SYSTEM/gold/Banque/2020.csv
../PLESNA Compta SYSTEM/gold/Banque/2022.csv
../PLESNA Compta SYSTEM/gold/Banque/2021.csv


## Lots

In [9]:
df_crg.head()

Unnamed: 0,Régie,Immeuble,Porte,Lot,Année,Mois,Catégorie,Fournisseur,Libellé,Débit,Crédit,Impact
0,Imi Gérance,B,9,B09,2020,1,Loyer Charge,,Règl. Loyer 01/2020,0.0,100.48,100.48
1,Imi Gérance,S,5,S05,2020,1,Loyer Charge,,Règl. Prov. Char 01/2020,0.0,191.0,191.0
2,Imi Gérance,S,5,S05,2020,1,Loyer Charge,,Règl. Loyer 01/2020,0.0,745.39,745.39
3,Imi Gérance,S,2,S02,2020,1,Loyer Charge,,Règl. Prov. Char 01/2020,0.0,519.0,519.0
4,Imi Gérance,S,2,S02,2020,1,Loyer Charge,,Règl. Loyer 01 à 03/2020,0.0,3473.79,3473.79


In [10]:
lot_path = mart_path / "Lot"
lot_path.mkdir(exist_ok=True)
for lot in df_crg["Lot"].unique():
    df  = df_crg[df_crg["Lot"] == lot]
    dest = lot_path/f"{lot}.csv"
    print(dest)
    to_csv(df, dest)

../PLESNA Compta SYSTEM/datamart/Lot/B09.csv
../PLESNA Compta SYSTEM/datamart/Lot/S05.csv
../PLESNA Compta SYSTEM/datamart/Lot/S02.csv
../PLESNA Compta SYSTEM/datamart/Lot/S10.csv
../PLESNA Compta SYSTEM/datamart/Lot/M05.csv
../PLESNA Compta SYSTEM/datamart/Lot/B07.csv
../PLESNA Compta SYSTEM/datamart/Lot/B14.csv
../PLESNA Compta SYSTEM/datamart/Lot/B10.csv
../PLESNA Compta SYSTEM/datamart/Lot/B02.csv
../PLESNA Compta SYSTEM/datamart/Lot/M04.csv
../PLESNA Compta SYSTEM/datamart/Lot/S04.csv
../PLESNA Compta SYSTEM/datamart/Lot/S11.csv
../PLESNA Compta SYSTEM/datamart/Lot/S20.csv
../PLESNA Compta SYSTEM/datamart/Lot/S13.csv
../PLESNA Compta SYSTEM/datamart/Lot/S18.csv
../PLESNA Compta SYSTEM/datamart/Lot/S06.csv
../PLESNA Compta SYSTEM/datamart/Lot/S09.csv
../PLESNA Compta SYSTEM/datamart/Lot/S16.csv
../PLESNA Compta SYSTEM/datamart/Lot/S12.csv
../PLESNA Compta SYSTEM/datamart/Lot/S07.csv
../PLESNA Compta SYSTEM/datamart/Lot/S15.csv
../PLESNA Compta SYSTEM/datamart/Lot/S19.csv
../PLESNA 

## PnL

In [11]:
years = set(list(df_crg["Année"].unique()) + list(df_banque["Année"]))
years

{2017, 2018, 2019, 2020, 2021, 2022}

In [48]:
cols = ["Catégorie", "Mois", "Impact"]
pnl_path = mart_path / "PnL"
pnl_path.mkdir(exist_ok=True)
for year in years:
    print(year)
    try:
        df_crg = pd.read_csv(crg_path/f"{year}.csv")[cols]
    except FileNotFoundError:
        df_crg = pd.DataFrame(columns=cols)
    try:
        df_bq = pd.read_csv(banque_path/f"{year}.csv")[cols]
    except FileNotFoundError:
        df_bq = pd.DataFrame(columns=cols)
    df = pd.concat([df_crg, df_bq])
    #pt = pd.pivot_table(df, index="Catégorie", columns="Mois", aggfunc="sum").fillna(0)
    pt = df.groupby(["Catégorie", "Mois"]).agg("sum").unstack().fillna(0)
    pt.columns = [c[1] for c in pt.columns]
    pt.reset_index(["Catégorie"])
    dest = pnl_path / f"{year}.csv" 
    print(dest)
    pt.to_csv(dest)

2017
../PLESNA Compta SYSTEM/datamart/PnL/2017.csv
2018
../PLESNA Compta SYSTEM/datamart/PnL/2018.csv
2019
../PLESNA Compta SYSTEM/datamart/PnL/2019.csv
2020
../PLESNA Compta SYSTEM/datamart/PnL/2020.csv
2021
../PLESNA Compta SYSTEM/datamart/PnL/2021.csv
2022
../PLESNA Compta SYSTEM/datamart/PnL/2022.csv


  df = pd.concat([df_crg, df_bq])
  df = pd.concat([df_crg, df_bq])
  df = pd.concat([df_crg, df_bq])


In [14]:
df.head()

Unnamed: 0,Catégorie,Mois,Impact
0,Hono E/S,1,-80.0
1,Hono E/S,1,-270.24
2,Hono E/S,1,-900.08
3,Hono E/S,1,-80.0
4,Hono E/S,1,-145.5


In [19]:
pt = pd.pivot_table(df, index="Catégorie", columns="Mois", aggfunc="sum").fillna(0)
pt

Unnamed: 0_level_0,Impact,Impact,Impact,Impact,Impact,Impact,Impact,Impact,Impact,Impact,Impact,Impact
Mois,1,2,3,4,5,6,7,8,9,10,11,12
Catégorie,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
Archi,-3600.0,-2160.0,-2880.0,-480.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Ascenseur,-1723.96,-114.74,-1651.72,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Assurance,-6624.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
CCA,0.0,2622912.98,-2780000.0,-100000.0,-20000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Comptable,0.0,0.0,-1320.0,-1200.0,0.0,-1320.0,0.0,0.0,-1320.0,0.0,0.0,-1320.0
Diagnotics,0.0,-672.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Eau,0.0,-50.52,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Elec,-273.74,-316.06,-845.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Entretien,-3184.22,-5196.08,-4516.42,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Frais Bancaire,-33.36,-33.36,-33.36,-35.8,-33.36,-38.24,-35.8,-50.04,0.0,-35.8,-35.8,-35.8


In [45]:
pt = df.groupby(["Catégorie", "Mois"]).agg("sum").unstack().fillna(0)
pt.columns

MultiIndex([('Impact',  1),
            ('Impact',  2),
            ('Impact',  3),
            ('Impact',  4),
            ('Impact',  5),
            ('Impact',  6),
            ('Impact',  7),
            ('Impact',  8),
            ('Impact',  9),
            ('Impact', 10),
            ('Impact', 11),
            ('Impact', 12)],
           names=[None, 'Mois'])

In [46]:
pt.columns = [c[1] for c in pt.columns]
pt.reset_index(["Catégorie"])

Unnamed: 0,Catégorie,1,2,3,4,5,6,7,8,9,10,11,12
0,Archi,-3600.0,-2160.0,-2880.0,-480.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Ascenseur,-1723.96,-114.74,-1651.72,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Assurance,-6624.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,CCA,0.0,2622912.98,-2780000.0,-100000.0,-20000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Comptable,0.0,0.0,-1320.0,-1200.0,0.0,-1320.0,0.0,0.0,-1320.0,0.0,0.0,-1320.0
5,Diagnotics,0.0,-672.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Eau,0.0,-50.52,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Elec,-273.74,-316.06,-845.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Entretien,-3184.22,-5196.08,-4516.42,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Frais Bancaire,-33.36,-33.36,-33.36,-35.8,-33.36,-38.24,-35.8,-50.04,0.0,-35.8,-35.8,-35.8


In [47]:
pt.index

Index(['Archi', 'Ascenseur', 'Assurance', 'CCA', 'Comptable', 'Diagnotics',
       'Eau', 'Elec', 'Entretien', 'Frais Bancaire', 'Frais Financier',
       'Gerant', 'Hono E/S', 'Hono Gestion', 'Loyer Charge', 'Revenue Gérance',
       'Solde Comptable', 'TF', 'Travaux', 'Xfert Tréso'],
      dtype='object', name='Catégorie')