diff --git a/pdf_oralia/join.py b/pdf_oralia/join.py new file mode 100644 index 0000000..5cf2688 --- /dev/null +++ b/pdf_oralia/join.py @@ -0,0 +1,22 @@ +import glob + +import pandas as pd + + +def join_excel(src, dest, file_pattern): + """Join every excel file in arc respecting file_pattern into on unique file in dist""" + filenames = list_files(src, file_pattern) + dfs = extract_dfs(filenames) + joined_df = pd.concat(dfs) + joined_df.to_excel(dest) + + +def list_files(src, file_glob): + return list(glob.iglob(f"{src}/{file_glob}")) + + +def extract_dfs(filenames): + dfs = [] + for filename in filenames: + dfs.append(pd.read_excel(filename)) + return dfs diff --git a/pdf_oralia/scripts.py b/pdf_oralia/scripts.py index cd486bd..d9e29c1 100644 --- a/pdf_oralia/scripts.py +++ b/pdf_oralia/scripts.py @@ -5,6 +5,7 @@ from pathlib import Path import click from .extract import extract_save +from .join import join_excel logging_config = dict( version=1, @@ -65,4 +66,7 @@ def all(src, dest): @click.option("--src", help="Tous les fichiers dans src", default="./") @click.option("--dest", help="Où mettre les fichiers produits", default="") def join(src, dest): - join_excel(src, dest, df_names=["charge", "locataire"]) + dest_charge = f"{dest}/charge.xlsx" + join_excel(src, dest_charge, "*_charge.xlsx") + dest_locataire = f"{dest}/locataire.xlsx" + join_excel(src, dest_locataire, "*_locataire.xlsx")