Feat(join): move join to its own lib

This commit is contained in:
Bertrand Benjamin 2023-05-07 07:03:01 +02:00
parent d9c5ec9c49
commit f48c51d69b
2 changed files with 32 additions and 16 deletions

30
pdf_oralia/join.py Normal file
View File

@ -0,0 +1,30 @@
import logging
from pathlib import Path
import pandas as pd
def extract_excel_to_dfs(directory, df_names=["charge", "locataire"]):
p = Path(directory)
dfs = {name: [] for name in df_names}
for file in p.glob("*.xlsx"):
year, month, immeuble, table = file.stem.split("_")
df = pd.read_excel(file, dtype={"lot": str}).assign(
annee=year, mois=month, immeuble=immeuble[:3]
)
dfs[table].append(df)
return dfs
def join_excel(directory, dest, df_names=["charge", "locataire"]):
dfs = extract_excel_to_dfs(directory, df_names)
destinations = {}
for tablename, datas in dfs.items():
df = pd.concat(datas)
destination = Path(dest) / f"{tablename}.xlsx"
df.to_excel(destination, index=False)
destinations[tablename] = destination
logging.info(f"{destination} written")
return destinations

View File

@ -6,6 +6,7 @@ import click
import pandas as pd
from .extract import extract_save
from .join import join_excel
logging_config = dict(
version=1,
@ -66,19 +67,4 @@ def all(src, dest):
@click.option("--src", help="Tous les fichiers dans src", default="./")
@click.option("--dest", help="Où mettre les fichiers produits", default="")
def join(src, dest):
p = Path(src)
dfs = {
"charge": [],
"locataire": [],
}
for file in p.glob("*.xlsx"):
year, month, immeuble, table = file.stem.split("_")
df = pd.read_excel(file, dtype={"lot": str}).assign(
annee=year, mois=month, immeuble=immeuble[:3]
)
dfs[table].append(df)
for tablename, datas in dfs.items():
df = pd.concat(datas)
destination = Path(dest) / f"{tablename}.xlsx"
df.to_excel(destination, index=False)
logging.info(f"{destination} written")
join_excel(src, dest, df_names=["charge", "locataire"])