Feat(join): move join to its own lib
This commit is contained in:
parent
d9c5ec9c49
commit
f48c51d69b
30
pdf_oralia/join.py
Normal file
30
pdf_oralia/join.py
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
def extract_excel_to_dfs(directory, df_names=["charge", "locataire"]):
|
||||||
|
p = Path(directory)
|
||||||
|
dfs = {name: [] for name in df_names}
|
||||||
|
|
||||||
|
for file in p.glob("*.xlsx"):
|
||||||
|
year, month, immeuble, table = file.stem.split("_")
|
||||||
|
df = pd.read_excel(file, dtype={"lot": str}).assign(
|
||||||
|
annee=year, mois=month, immeuble=immeuble[:3]
|
||||||
|
)
|
||||||
|
dfs[table].append(df)
|
||||||
|
|
||||||
|
return dfs
|
||||||
|
|
||||||
|
|
||||||
|
def join_excel(directory, dest, df_names=["charge", "locataire"]):
|
||||||
|
dfs = extract_excel_to_dfs(directory, df_names)
|
||||||
|
destinations = {}
|
||||||
|
for tablename, datas in dfs.items():
|
||||||
|
df = pd.concat(datas)
|
||||||
|
destination = Path(dest) / f"{tablename}.xlsx"
|
||||||
|
df.to_excel(destination, index=False)
|
||||||
|
destinations[tablename] = destination
|
||||||
|
logging.info(f"{destination} written")
|
||||||
|
return destinations
|
@ -6,6 +6,7 @@ import click
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
from .extract import extract_save
|
from .extract import extract_save
|
||||||
|
from .join import join_excel
|
||||||
|
|
||||||
logging_config = dict(
|
logging_config = dict(
|
||||||
version=1,
|
version=1,
|
||||||
@ -66,19 +67,4 @@ def all(src, dest):
|
|||||||
@click.option("--src", help="Tous les fichiers dans src", default="./")
|
@click.option("--src", help="Tous les fichiers dans src", default="./")
|
||||||
@click.option("--dest", help="Où mettre les fichiers produits", default="")
|
@click.option("--dest", help="Où mettre les fichiers produits", default="")
|
||||||
def join(src, dest):
|
def join(src, dest):
|
||||||
p = Path(src)
|
join_excel(src, dest, df_names=["charge", "locataire"])
|
||||||
dfs = {
|
|
||||||
"charge": [],
|
|
||||||
"locataire": [],
|
|
||||||
}
|
|
||||||
for file in p.glob("*.xlsx"):
|
|
||||||
year, month, immeuble, table = file.stem.split("_")
|
|
||||||
df = pd.read_excel(file, dtype={"lot": str}).assign(
|
|
||||||
annee=year, mois=month, immeuble=immeuble[:3]
|
|
||||||
)
|
|
||||||
dfs[table].append(df)
|
|
||||||
for tablename, datas in dfs.items():
|
|
||||||
df = pd.concat(datas)
|
|
||||||
destination = Path(dest) / f"{tablename}.xlsx"
|
|
||||||
df.to_excel(destination, index=False)
|
|
||||||
logging.info(f"{destination} written")
|
|
||||||
|
Loading…
Reference in New Issue
Block a user