diff --git a/pdf_oralia/join.py b/pdf_oralia/join.py new file mode 100644 index 0000000..f4d87b8 --- /dev/null +++ b/pdf_oralia/join.py @@ -0,0 +1,30 @@ +import logging +from pathlib import Path + +import pandas as pd + + +def extract_excel_to_dfs(directory, df_names=["charge", "locataire"]): + p = Path(directory) + dfs = {name: [] for name in df_names} + + for file in p.glob("*.xlsx"): + year, month, immeuble, table = file.stem.split("_") + df = pd.read_excel(file, dtype={"lot": str}).assign( + annee=year, mois=month, immeuble=immeuble[:3] + ) + dfs[table].append(df) + + return dfs + + +def join_excel(directory, dest, df_names=["charge", "locataire"]): + dfs = extract_excel_to_dfs(directory, df_names) + destinations = {} + for tablename, datas in dfs.items(): + df = pd.concat(datas) + destination = Path(dest) / f"{tablename}.xlsx" + df.to_excel(destination, index=False) + destinations[tablename] = destination + logging.info(f"{destination} written") + return destinations diff --git a/pdf_oralia/scripts.py b/pdf_oralia/scripts.py index 54bb58c..856d231 100644 --- a/pdf_oralia/scripts.py +++ b/pdf_oralia/scripts.py @@ -6,6 +6,7 @@ import click import pandas as pd from .extract import extract_save +from .join import join_excel logging_config = dict( version=1, @@ -66,19 +67,4 @@ def all(src, dest): @click.option("--src", help="Tous les fichiers dans src", default="./") @click.option("--dest", help="Où mettre les fichiers produits", default="") def join(src, dest): - p = Path(src) - dfs = { - "charge": [], - "locataire": [], - } - for file in p.glob("*.xlsx"): - year, month, immeuble, table = file.stem.split("_") - df = pd.read_excel(file, dtype={"lot": str}).assign( - annee=year, mois=month, immeuble=immeuble[:3] - ) - dfs[table].append(df) - for tablename, datas in dfs.items(): - df = pd.concat(datas) - destination = Path(dest) / f"{tablename}.xlsx" - df.to_excel(destination, index=False) - logging.info(f"{destination} written") + join_excel(src, dest, df_names=["charge", "locataire"])