Feat(join): move join to its own lib
This commit is contained in:
parent
d9c5ec9c49
commit
f48c51d69b
30
pdf_oralia/join.py
Normal file
30
pdf_oralia/join.py
Normal file
@ -0,0 +1,30 @@
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def extract_excel_to_dfs(directory, df_names=["charge", "locataire"]):
|
||||
p = Path(directory)
|
||||
dfs = {name: [] for name in df_names}
|
||||
|
||||
for file in p.glob("*.xlsx"):
|
||||
year, month, immeuble, table = file.stem.split("_")
|
||||
df = pd.read_excel(file, dtype={"lot": str}).assign(
|
||||
annee=year, mois=month, immeuble=immeuble[:3]
|
||||
)
|
||||
dfs[table].append(df)
|
||||
|
||||
return dfs
|
||||
|
||||
|
||||
def join_excel(directory, dest, df_names=["charge", "locataire"]):
|
||||
dfs = extract_excel_to_dfs(directory, df_names)
|
||||
destinations = {}
|
||||
for tablename, datas in dfs.items():
|
||||
df = pd.concat(datas)
|
||||
destination = Path(dest) / f"{tablename}.xlsx"
|
||||
df.to_excel(destination, index=False)
|
||||
destinations[tablename] = destination
|
||||
logging.info(f"{destination} written")
|
||||
return destinations
|
@ -6,6 +6,7 @@ import click
|
||||
import pandas as pd
|
||||
|
||||
from .extract import extract_save
|
||||
from .join import join_excel
|
||||
|
||||
logging_config = dict(
|
||||
version=1,
|
||||
@ -66,19 +67,4 @@ def all(src, dest):
|
||||
@click.option("--src", help="Tous les fichiers dans src", default="./")
|
||||
@click.option("--dest", help="Où mettre les fichiers produits", default="")
|
||||
def join(src, dest):
|
||||
p = Path(src)
|
||||
dfs = {
|
||||
"charge": [],
|
||||
"locataire": [],
|
||||
}
|
||||
for file in p.glob("*.xlsx"):
|
||||
year, month, immeuble, table = file.stem.split("_")
|
||||
df = pd.read_excel(file, dtype={"lot": str}).assign(
|
||||
annee=year, mois=month, immeuble=immeuble[:3]
|
||||
)
|
||||
dfs[table].append(df)
|
||||
for tablename, datas in dfs.items():
|
||||
df = pd.concat(datas)
|
||||
destination = Path(dest) / f"{tablename}.xlsx"
|
||||
df.to_excel(destination, index=False)
|
||||
logging.info(f"{destination} written")
|
||||
join_excel(src, dest, df_names=["charge", "locataire"])
|
||||
|
Loading…
Reference in New Issue
Block a user