15 Commits

6 changed files with 88 additions and 32 deletions

View File

@@ -35,11 +35,6 @@ steps:
homeserver: https://matrix.poneyworld.net
template: "Une nouvelle version (${DRONE_TAG}) de pdf-oralia est publiée!"
when:
event:
include:
- tag
# Déclencheur de la pipeline
trigger:
event:

29
pdf_oralia/join.py Normal file
View File

@@ -0,0 +1,29 @@
import glob
import logging
import pandas as pd
def join_excel(src, dest, file_pattern):
"""Join every excel file in arc respecting file_pattern into on unique file in dist"""
filenames = list_files(src, file_pattern)
logging.debug(f"Concatenate {filenames}")
dfs = extract_dfs(filenames)
joined_df = pd.concat(dfs)
logging.debug(f"Writing joined excel to {dest}")
joined_df.to_excel(dest, index=False)
logging.debug(f"with {len(joined_df)} rows")
def list_files(src, file_glob):
return list(glob.iglob(f"{src}/{file_glob}"))
def extract_dfs(filenames):
dfs = []
for filename in filenames:
logging.debug(f"Extracting {filename}")
df = pd.read_excel(filename)
logging.debug(f"Found {len(df)} rows")
dfs.append(df)
return dfs

View File

@@ -5,7 +5,16 @@ from pathlib import Path
import click
from .extract import extract_save
from .join import join_excel
@click.group()
@click.option("--debug/--no-debug", default=False)
def main(debug):
if debug:
logging_level = logging.DEBUG
else:
logging_level = logging.INFO
logging_config = dict(
version=1,
formatters={"f": {"format": "%(levelname)-8s %(name)-12s %(message)s"}},
@@ -13,23 +22,18 @@ logging_config = dict(
"h": {
"class": "logging.StreamHandler",
"formatter": "f",
"level": logging.DEBUG,
"level": logging_level,
}
},
root={
"handlers": ["h"],
"level": logging.DEBUG,
"level": logging_level,
},
)
dictConfig(logging_config)
@click.group()
def main():
pass
@main.group()
def extract():
pass
@@ -64,5 +68,31 @@ def all(src, dest):
@main.command()
@click.option("--src", help="Tous les fichiers dans src", default="./")
@click.option("--dest", help="Où mettre les fichiers produits", default="")
def join(src, dest):
join_excel(src, dest, df_names=["charge", "locataire"])
@click.option(
"--force",
help="Ecraser si le ficher destination existe.",
default=False,
is_flag=True,
)
def join(src, dest, force):
"""Join tous les fichiers excel charge (resp locataire) de src dans un seul fichier charge.xlsx dans dist.
Exemple:
pdf-oralia join --src <dossier_source> --dest <dossier_destination>
"""
dest_charge = f"{dest}/charge.xlsx"
if not force and Path(dest_charge).exists():
raise ValueError(f"The file {dest_charge} already exists")
dest_locataire = f"{dest}/locataire.xlsx"
if not force and Path(dest_locataire).exists():
raise ValueError(f"The file {dest_locataire} already exists")
if not Path(src).exists():
raise ValueError(f"The source directory ({src}) does not exists.")
join_excel(src, dest_charge, "*_charge.xlsx")
logging.info(f"Les données charges ont été concaténées dans {dest_charge}")
join_excel(src, dest_locataire, "*_locataire.xlsx")
logging.info(f"Les données locataires ont été concaténées dans {dest_locataire}")

View File

@@ -1,6 +1,6 @@
[tool.poetry]
name = "pdf-oralia"
version = "dev"
version = "0"
description = ""
authors = ["Bertrand Benjamin <benjamin.bertrand@opytex.org>"]
readme = "README.md"

2
renovate.json Normal file
View File

@@ -0,0 +1,2 @@
{
}

View File

@@ -29,7 +29,7 @@ jupyter_client==7.3.5
jupyterlab-pygments==0.2.2
jupyterlab-widgets==3.0.3
lxml==4.9.1
MarkupSafe==2.1.1
MarkupSafe==2.1.3
matplotlib-inline==0.1.6
mistune==2.0.4
nbclient==0.6.8
@@ -62,7 +62,7 @@ pytz==2022.2.1
pyzmq==24.0.1
qtconsole==5.3.2
QtPy==2.2.0
Send2Trash==1.8.0
Send2Trash==1.8.2
six==1.16.0
soupsieve==2.3.2.post1
stack-data==0.5.1
@@ -70,7 +70,7 @@ terminado==0.15.0
tinycss2==1.1.1
tornado==6.2
traitlets==5.4.0
Wand==0.6.10
Wand==0.6.13
wcwidth==0.2.5
webencodings==0.5.1
widgetsnbextension==4.0.3