In [1]:
import pandas as pd
from pathlib import Path

In [2]:
staging_path = Path("../PLESNA Compta SYSTEM/staging/")
staging_columns = ["Régie","Immeuble","Porte","Lot","Année","Mois","Catégorie","Fournisseur","Libellé","Débit","Crédit","Impact"]

In [3]:
staging_files = set()

In [4]:
def to_csv(df, dest):
    if dest.exists():
        df.to_csv(dest, mode="a", header=False, index=False)
    else:
        df.to_csv(dest, index=False)

In [5]:
for f in staging_path.glob("**/*.csv"):
    f.unlink()

# Import history CRG

Le but de cette partie est d'importer les anciens CRG et de les adapter au format actuel.

In [6]:
raw_path = Path("../PLESNA Compta SYSTEM/raw/CRG/")
assert raw_path.exists()
list(raw_path.glob("*/**"))

[]

## Import de `2019 et avant.xlsx`


In [7]:
file = raw_path/"2019 et avant.xlsx"
assert file.exists()
df = pd.read_excel(file, sheet_name="IMI Gérence", )
                  #parse_dates = ["Date"], date_format="%Y-%m%d")

In [8]:
cat = pd.read_excel(file, sheet_name="Catégorie Mapping")
cat_drop =  list(cat[cat["Nouvelles"]=="NE PAS IMPORTER"]["Anciennes"])
print(cat_drop)
cat_trans =  cat[cat["Nouvelles"]!="NE PAS IMPORTER"]
trans = {}
for _, (old, new) in cat_trans.iterrows():
    trans[old] = new
trans

['Caution', 'Solde Comptable', 'Xfert entre compa immeubles']


{'Ascenseur': 'Ascenseur',
 'Charge Remboursement': 'Loyer Charge',
 'contrat assurance': 'Assurance',
 'contrat entretien': 'Entretien',
 'diagnostics': 'Diagnotics',
 'divers (plaques…)': 'Travaux',
 'Elec': 'Elec',
 'honor  location': 'Hono Gestion',
 'honor EDL': 'Hono E/S',
 'honor gestion': 'Hono Gestion',
 'honor location': 'Hono E/S',
 'honor remise': 'Hono Gestion',
 'Loyer + Charges': 'Loyer Charge',
 'Tel': 'Tel',
 'travaux': 'Travaux'}

In [9]:
df.head()

Unnamed: 0,Régie,Immeuble,Porte,Date,Catégorie,Libellé,Débit,Crédit
0,Imi Gérance,S,5,2017-05-31,Loyer + Charges,Règl. Loyer 06/2017,,720.0
1,Imi Gérance,S,5,2017-05-31,Loyer + Charges,Règl. Prov. Char 06/2017,,191.0
2,Imi Gérance,S,1,2017-06-01,Loyer + Charges,Règl. Prov. Char 04 à 06/2017,,633.28
3,Imi Gérance,S,4,2017-06-01,Loyer + Charges,Règl. Loyer 06/2017,,576.0
4,Imi Gérance,S,4,2017-06-01,Loyer + Charges,Règl. Prov. Char 06/2017,,31.0


In [10]:
df.dtypes

Régie                object
Immeuble             object
Porte                object
Date         datetime64[ns]
Catégorie            object
Libellé              object
Débit               float64
Crédit              float64
dtype: object

Filter lines

In [11]:
df = df[~df["Catégorie"].isin(cat_drop)]

Featuring

In [12]:
df = df.assign(
    Débit = df["Débit"].fillna(0),
    Crédit = df["Crédit"].fillna(0),
    Lot = df["Immeuble"].astype(str)+df["Porte"].astype("str").str.zfill(2),
    Année = df["Date"].astype(str).str.slice(0,4),
    Mois = df["Date"].astype(str).str.slice(5,7),
    Catégorie = df["Catégorie"].replace(trans),
    Fournisseur = "",
)
df = df.assign(
    Impact = df["Crédit"] - df["Débit"],
)
df.head()

Unnamed: 0,Régie,Immeuble,Porte,Date,Catégorie,Libellé,Débit,Crédit,Lot,Année,Mois,Fournisseur,Impact
0,Imi Gérance,S,5,2017-05-31,Loyer Charge,Règl. Loyer 06/2017,0.0,720.0,S05,2017,5,,720.0
1,Imi Gérance,S,5,2017-05-31,Loyer Charge,Règl. Prov. Char 06/2017,0.0,191.0,S05,2017,5,,191.0
2,Imi Gérance,S,1,2017-06-01,Loyer Charge,Règl. Prov. Char 04 à 06/2017,0.0,633.28,S01,2017,6,,633.28
3,Imi Gérance,S,4,2017-06-01,Loyer Charge,Règl. Loyer 06/2017,0.0,576.0,S04,2017,6,,576.0
4,Imi Gérance,S,4,2017-06-01,Loyer Charge,Règl. Prov. Char 06/2017,0.0,31.0,S04,2017,6,,31.0


Verify columns and select thoses

In [13]:
df = df[staging_columns]

In [14]:

for year in df["Année"].unique():
    df_year = df[df["Année"]==year]
    dest = staging_path/ f"CRG/{year}.csv"
    print(dest)
    to_csv(df_year, dest)
    staging_files.add(dest)

../PLESNA Compta SYSTEM/staging/CRG/2017.csv
../PLESNA Compta SYSTEM/staging/CRG/2018.csv
../PLESNA Compta SYSTEM/staging/CRG/2019.csv
../PLESNA Compta SYSTEM/staging/CRG/2020.csv


## Import `2020 2022.xslx`

In [15]:
file = raw_path/"2020 2022.xlsx"
assert file.exists()
df = pd.read_excel(file, sheet_name="DB CRG", )

In [16]:
cat = pd.read_excel(file, sheet_name="Catégories")
cat_drop =  list(cat[cat["Nouvelles"]=="NE PAS IMPORTER"]["Anciennes"])
print(cat_drop)
cat_trans =  cat[cat["Nouvelles"]!="NE PAS IMPORTER"]
trans = {}
for _, (old, new) in cat_trans.iterrows():
    trans[old] = new
trans

['caution', 'Solde Comptable', 'Xfert entre compa immeubles', 'Xfert Tréso']


{'contentieux': 'Contentieux',
 'contrat ascenseur': 'Ascenseur',
 'contrat assurance': 'Assurance',
 'contrat assurance juridique': 'Assurance',
 'contrat entretien': 'Entretien',
 'diagnostics': 'Diagnotics',
 'divers': 'Travaux',
 'divers (plaques…)': 'Travaux',
 'eau': 'Eau',
 'eau ': 'Eau',
 'électricité': 'Elec',
 'honor  location': 'Hono Gestion',
 'honor divers': 'Hono Gestion',
 'honor EDL': 'Hono E/S',
 'honor edl ': 'Hono E/S',
 'honor gestion': 'Hono Gestion',
 'honor location': 'Hono E/S',
 'honor remise': 'Hono Gestion',
 'Honoraire Gestion': 'Hono Gestion',
 'loyer+charge': 'Loyer Charge',
 'travaux': 'Travaux'}

In [17]:
df.head()

Unnamed: 0,N°,Régie,immeuble,porte,Date,Categorie,Libellé,Débit,Crédit,Réserve,Année,Mois,Trimestre,Lot,Impact
0,1.0,IMIGérance,S,5,2017-05-31,loyer+charge,Règl. Loyer 06/2017,,720.0,,2017,5,2,S05,-720.0
1,2.0,IMIGérance,S,5,2017-05-31,loyer+charge,Règl. Prov. Char 06/2017,,191.0,,2017,5,2,S05,-191.0
2,3.0,IMIGérance,S,1,2017-06-01,loyer+charge,Règl. Prov. Char 04 à 06/2017,,633.28,,2017,6,2,S01,-633.28
3,4.0,IMIGérance,S,4,2017-06-01,loyer+charge,Règl. Loyer 06/2017,,576.0,,2017,6,2,S04,-576.0
4,5.0,IMIGérance,S,4,2017-06-01,loyer+charge,Règl. Prov. Char 06/2017,,31.0,,2017,6,2,S04,-31.0


In [18]:
df = df.assign(
    Débit = df["Débit"].fillna(0),
    Immeuble = df["immeuble"],
    Porte = df["porte"],
    Crédit = df["Crédit"].fillna(0),
    Lot = df["immeuble"].astype(str)+df["porte"].astype("str").str.zfill(2),
    Année = df["Date"].astype(str).str.slice(0,4),
    Mois = df["Date"].astype(str).str.slice(5,7),
    Impact = df["Crédit"] - df["Débit"],
    Catégorie = df["Categorie"].replace(trans),
    Fournisseur = "",
)
df.head()

Unnamed: 0,N°,Régie,immeuble,porte,Date,Categorie,Libellé,Débit,Crédit,Réserve,Année,Mois,Trimestre,Lot,Impact,Immeuble,Porte,Catégorie,Fournisseur
0,1.0,IMIGérance,S,5,2017-05-31,loyer+charge,Règl. Loyer 06/2017,0.0,720.0,,2017,5,2,S05,,S,5,Loyer Charge,
1,2.0,IMIGérance,S,5,2017-05-31,loyer+charge,Règl. Prov. Char 06/2017,0.0,191.0,,2017,5,2,S05,,S,5,Loyer Charge,
2,3.0,IMIGérance,S,1,2017-06-01,loyer+charge,Règl. Prov. Char 04 à 06/2017,0.0,633.28,,2017,6,2,S01,,S,1,Loyer Charge,
3,4.0,IMIGérance,S,4,2017-06-01,loyer+charge,Règl. Loyer 06/2017,0.0,576.0,,2017,6,2,S04,,S,4,Loyer Charge,
4,5.0,IMIGérance,S,4,2017-06-01,loyer+charge,Règl. Prov. Char 06/2017,0.0,31.0,,2017,6,2,S04,,S,4,Loyer Charge,


In [19]:
df = df[staging_columns]

In [20]:

for year in df["Année"].unique():
    df_year = df[df["Année"]==year]
    dest = staging_path/ f"CRG/{year}.csv"
    print(dest)
    to_csv(df_year, dest)
    staging_files.add(dest)

../PLESNA Compta SYSTEM/staging/CRG/2017.csv
../PLESNA Compta SYSTEM/staging/CRG/2018.csv
../PLESNA Compta SYSTEM/staging/CRG/2019.csv
../PLESNA Compta SYSTEM/staging/CRG/2020.csv
../PLESNA Compta SYSTEM/staging/CRG/2021.csv
../PLESNA Compta SYSTEM/staging/CRG/2022.csv


## Import de `2023`

In [21]:
file = raw_path/"2023.xlsx"
assert file.exists()
df = pd.read_excel(file, sheet_name="DB CRG 2023 ...", )
year = 2023

In [22]:
df = df.assign(
    Débit = df["Débit"].fillna(0),
    Crédit = df["Crédit"].fillna(0),
    Lot = df["Immeuble"].astype(str)+df["Porte"].astype("str").str.zfill(2),
    Année = year,
)
df = df.assign(
    Impact = df["Crédit"] - df["Débit"],
)
df.head()

Unnamed: 0,Régie,Immeuble,Porte,Mois,Catégorie,Fournisseur,Libellé,Débit,Crédit,Lot,Année,Impact
0,Gelas,B,*,1,Entretien,POEZEVARA NETTOYAGE,FACTURE ENTRETIEN IMMEUBLE PC N° FC61954,491.17,0.0,B0*,2023,-491.17
1,Gelas,B,*,1,Hono Gestion,Rosier,Honoraires H.T.,699.1,0.0,B0*,2023,-699.1
2,Gelas,B,*,1,Hono Gestion,Rosier,TVA/Honoraires ( 20.00 % ),139.82,0.0,B0*,2023,-139.82
3,Gelas,M,*,1,Entretien,REMALI BRIL'OR,PC ENTRETIEN - DECEMBRE 22,363.77,0.0,M0*,2023,-363.77
4,Gelas,M,*,1,Entretien,ASTEC,MARIETTON ENTRETIEN 1T23,453.79,0.0,M0*,2023,-453.79


In [23]:
df = df[staging_columns]

In [24]:

dest = staging_path/ f"CRG/{year}.csv"
print(dest)
to_csv(df_year, dest)
staging_files.add(dest)

../PLESNA Compta SYSTEM/staging/CRG/2023.csv


# Import de l'historique de banque

In [25]:
banque_path = Path("../PLESNA Compta SYSTEM/raw/Banque/")
assert raw_path.exists()
list(banque_path.glob("*"))

[PosixPath('../PLESNA Compta SYSTEM/raw/Banque/Histoire depuis 2020.xlsx')]

In [26]:
schema_banque = staging_path / "Banque/Schema.xlsx"
assert schema_banque.exists()
banque_columns = pd.read_excel(schema_banque, sheet_name="Schema").columns
banque_columns

Index(['Banque', 'Immeuble', 'Porte', 'Lot', 'date', 'Année', 'Mois',
       'Catégorie', 'Libellé', 'Débit', 'Crédit', 'Impact'],
      dtype='object')

## Import de `Histoire ...`

In [27]:
file = banque_path/"Histoire depuis 2020.xlsx"
assert file.exists()
df = pd.read_excel(file, skiprows=2)
df

Unnamed: 0,N°,Immeuble,Porte,date,catégorie,libellé,DEBIT,CREDIT,BANQUE,Année,Mois,Lot,Impact,CM,SL,BNP,Solde
0,2020,,,NaT,,,,,,,,,,,,,
1,1,,,2020-01-15,Frais Bancaire,,14.1,,CM,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,S,,2020-01-15,TF,Impots,1570.0,,CM,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3,M,,2020-01-15,TF,Impots,1209.0,,CM,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4,B,,2020-01-15,TF,Impots,905.0,,CM,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
375,0,M,,2022-12-24,Revenue Gérance,VIR SEPA,,10118.86,BNP,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
376,0,B,,2022-12-24,Revenue Gérance,VIR SEPA,,10343.82,BNP,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
377,0,,,2022-12-28,Frais Financier,ECHEANCE PRET 01383 60934927,1250.0,,BNP,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
378,0,,,2022-12-30,Gerant,TNS Gerant A. Bertrand Aout,1000.0,,BNP,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [28]:
# Bad line clean
df = df.dropna(subset=["Immeuble", "Porte", "date"], how="all")

In [29]:
df = df.assign(
    Banque = df["BANQUE"],
    Catégorie = df["catégorie"],
    Libellé = df["libellé"],
    Débit = df["DEBIT"].fillna(0),
    Crédit = df["CREDIT"].fillna(0),
    Année = df["date"].astype(str).str.slice(0,4),
    Mois = df["date"].astype(str).str.slice(5,7),
)
df = df.assign(
    Impact = df["Crédit"] - df["Débit"],
)


In [30]:
df = df[banque_columns]

In [31]:
df.head()

Unnamed: 0,Banque,Immeuble,Porte,Lot,date,Année,Mois,Catégorie,Libellé,Débit,Crédit,Impact
1,CM,,,0.0,2020-01-15,2020,1,Frais Bancaire,,14.1,0.0,-14.1
2,CM,S,,0.0,2020-01-15,2020,1,TF,Impots,1570.0,0.0,-1570.0
3,CM,M,,0.0,2020-01-15,2020,1,TF,Impots,1209.0,0.0,-1209.0
4,CM,B,,0.0,2020-01-15,2020,1,TF,Impots,905.0,0.0,-905.0
5,CM,,,0.0,2020-01-15,2020,1,Frais Financier,"Prêts CM 1,8M€",5715.0,0.0,-5715.0


In [32]:
for year in df["Année"].unique():
    df_year = df[df["Année"]==year]
    dest = staging_path/ f"Banque/{year}.csv"
    print(dest)
    to_csv(df_year, dest)
    staging_files.add(dest)

../PLESNA Compta SYSTEM/staging/Banque/2020.csv
../PLESNA Compta SYSTEM/staging/Banque/2021.csv
../PLESNA Compta SYSTEM/staging/Banque/2022.csv


## Clean duplicates

In [33]:
staging_files

{PosixPath('../PLESNA Compta SYSTEM/staging/Banque/2020.csv'),
 PosixPath('../PLESNA Compta SYSTEM/staging/Banque/2021.csv'),
 PosixPath('../PLESNA Compta SYSTEM/staging/Banque/2022.csv'),
 PosixPath('../PLESNA Compta SYSTEM/staging/CRG/2017.csv'),
 PosixPath('../PLESNA Compta SYSTEM/staging/CRG/2018.csv'),
 PosixPath('../PLESNA Compta SYSTEM/staging/CRG/2019.csv'),
 PosixPath('../PLESNA Compta SYSTEM/staging/CRG/2020.csv'),
 PosixPath('../PLESNA Compta SYSTEM/staging/CRG/2021.csv'),
 PosixPath('../PLESNA Compta SYSTEM/staging/CRG/2022.csv'),
 PosixPath('../PLESNA Compta SYSTEM/staging/CRG/2023.csv')}

In [34]:
for file in staging_files:
    df = pd.read_csv(file)
    print(f"{file} got {len(df[df.duplicated()])} duplicated rows")
    df = df[~df.duplicated()]
    to_csv(df, file)

../PLESNA Compta SYSTEM/staging/Banque/2020.csv got 0 duplicated rows
../PLESNA Compta SYSTEM/staging/CRG/2018.csv got 72 duplicated rows
../PLESNA Compta SYSTEM/staging/Banque/2021.csv got 0 duplicated rows
../PLESNA Compta SYSTEM/staging/CRG/2017.csv got 12 duplicated rows
../PLESNA Compta SYSTEM/staging/CRG/2020.csv got 29 duplicated rows
../PLESNA Compta SYSTEM/staging/Banque/2022.csv got 1 duplicated rows
../PLESNA Compta SYSTEM/staging/CRG/2019.csv got 24 duplicated rows
../PLESNA Compta SYSTEM/staging/CRG/2021.csv got 2 duplicated rows
../PLESNA Compta SYSTEM/staging/CRG/2023.csv got 0 duplicated rows
../PLESNA Compta SYSTEM/staging/CRG/2022.csv got 0 duplicated rows
