From f2bcf6241a8b9a2347764a09d22bfbbfd0feb6ef Mon Sep 17 00:00:00 2001 From: Bertrand Benjamin Date: Thu, 5 Oct 2023 15:10:39 +0200 Subject: [PATCH 1/3] Fix: rebuild join_excel --- pdf_oralia/join.py | 22 ++++++++++++++++++++++ pdf_oralia/scripts.py | 6 +++++- 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 pdf_oralia/join.py diff --git a/pdf_oralia/join.py b/pdf_oralia/join.py new file mode 100644 index 0000000..5cf2688 --- /dev/null +++ b/pdf_oralia/join.py @@ -0,0 +1,22 @@ +import glob + +import pandas as pd + + +def join_excel(src, dest, file_pattern): + """Join every excel file in arc respecting file_pattern into on unique file in dist""" + filenames = list_files(src, file_pattern) + dfs = extract_dfs(filenames) + joined_df = pd.concat(dfs) + joined_df.to_excel(dest) + + +def list_files(src, file_glob): + return list(glob.iglob(f"{src}/{file_glob}")) + + +def extract_dfs(filenames): + dfs = [] + for filename in filenames: + dfs.append(pd.read_excel(filename)) + return dfs diff --git a/pdf_oralia/scripts.py b/pdf_oralia/scripts.py index cd486bd..d9e29c1 100644 --- a/pdf_oralia/scripts.py +++ b/pdf_oralia/scripts.py @@ -5,6 +5,7 @@ from pathlib import Path import click from .extract import extract_save +from .join import join_excel logging_config = dict( version=1, @@ -65,4 +66,7 @@ def all(src, dest): @click.option("--src", help="Tous les fichiers dans src", default="./") @click.option("--dest", help="Où mettre les fichiers produits", default="") def join(src, dest): - join_excel(src, dest, df_names=["charge", "locataire"]) + dest_charge = f"{dest}/charge.xlsx" + join_excel(src, dest_charge, "*_charge.xlsx") + dest_locataire = f"{dest}/locataire.xlsx" + join_excel(src, dest_locataire, "*_locataire.xlsx") -- 2.45.2 From 132e37267b168d1bdf48009c475fc4116b50af16 Mon Sep 17 00:00:00 2001 From: Bertrand Benjamin Date: Thu, 5 Oct 2023 15:19:16 +0200 Subject: [PATCH 2/3] Feat: logging and option about overwritting --- pdf_oralia/scripts.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/pdf_oralia/scripts.py b/pdf_oralia/scripts.py index d9e29c1..ba4be5f 100644 --- a/pdf_oralia/scripts.py +++ b/pdf_oralia/scripts.py @@ -65,8 +65,29 @@ def all(src, dest): @main.command() @click.option("--src", help="Tous les fichiers dans src", default="./") @click.option("--dest", help="Où mettre les fichiers produits", default="") -def join(src, dest): +@click.option( + "--force", + help="Ecraser si le ficher destination existe.", + default=False, + is_flag=True, +) +def join(src, dest, force): + """Join tous les fichiers excel charge (resp locataire) de src dans un seul fichier charge.xlsx dans dist. + + Exemple: + + pdf-oralia join --src --dest + + + """ dest_charge = f"{dest}/charge.xlsx" - join_excel(src, dest_charge, "*_charge.xlsx") + if not force and Path(dest_charge).exists(): + raise ValueError(f"The file {dest_charge} already exists") dest_locataire = f"{dest}/locataire.xlsx" + if not force and Path(dest_locataire).exists(): + raise ValueError(f"The file {dest_locataire} already exists") + + join_excel(src, dest_charge, "*_charge.xlsx") + logging.info(f"Les données charges ont été concaténées dans {dest_charge}") join_excel(src, dest_locataire, "*_locataire.xlsx") + logging.info(f"Les données locataires ont été concaténées dans {dest_locataire}") -- 2.45.2 From 48e75358ac0a161d49bfdcebe16b45f6c75c8244 Mon Sep 17 00:00:00 2001 From: Bertrand Benjamin Date: Thu, 5 Oct 2023 15:22:14 +0200 Subject: [PATCH 3/3] Fix: remove index in excel outputs --- pdf_oralia/join.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pdf_oralia/join.py b/pdf_oralia/join.py index 5cf2688..37c5aea 100644 --- a/pdf_oralia/join.py +++ b/pdf_oralia/join.py @@ -8,7 +8,7 @@ def join_excel(src, dest, file_pattern): filenames = list_files(src, file_pattern) dfs = extract_dfs(filenames) joined_df = pd.concat(dfs) - joined_df.to_excel(dest) + joined_df.to_excel(dest, index=False) def list_files(src, file_glob): -- 2.45.2