Compare commits
22 Commits
0040dccd9a
...
v0.0.1
Author | SHA1 | Date | |
---|---|---|---|
8b4d5826a5 | |||
6e0ffe9085 | |||
ab2fdb0541 | |||
0fc39ed317 | |||
a6d6681756 | |||
4eecb3a44c | |||
60da623323 | |||
1f1e3e2741 | |||
2b3e935f39 | |||
ef63f22d44 | |||
1020ef9257 | |||
39084ceebd | |||
7de6c8dd9c | |||
da3815eea6 | |||
45d343d810 | |||
806227f202 | |||
7bf0c38883 | |||
b15b059e2a | |||
48e75358ac | |||
132e37267b | |||
f2bcf6241a | |||
ec9cc19be5 |
@@ -27,7 +27,7 @@ steps:
|
|||||||
environment:
|
environment:
|
||||||
MATRIX_ROOMID:
|
MATRIX_ROOMID:
|
||||||
from_secret: MATRIX_ROOMID
|
from_secret: MATRIX_ROOMID
|
||||||
MATRIX_ACCESSTOKEN:
|
MATRIX_ACCESSTOKEN:
|
||||||
from_secret: MATRIX_ACCESSTOKEN
|
from_secret: MATRIX_ACCESSTOKEN
|
||||||
MATRIX_USERID:
|
MATRIX_USERID:
|
||||||
from_secret: MATRIX_USERID
|
from_secret: MATRIX_USERID
|
||||||
@@ -35,11 +35,6 @@ steps:
|
|||||||
homeserver: https://matrix.poneyworld.net
|
homeserver: https://matrix.poneyworld.net
|
||||||
template: "Une nouvelle version (${DRONE_TAG}) de pdf-oralia est publiée!"
|
template: "Une nouvelle version (${DRONE_TAG}) de pdf-oralia est publiée!"
|
||||||
|
|
||||||
when:
|
|
||||||
event:
|
|
||||||
include:
|
|
||||||
- tag
|
|
||||||
|
|
||||||
# Déclencheur de la pipeline
|
# Déclencheur de la pipeline
|
||||||
trigger:
|
trigger:
|
||||||
event:
|
event:
|
||||||
|
28
.gitea/workflows/publish_tag.yml
Normal file
28
.gitea/workflows/publish_tag.yml
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
name: pdf-auralia-build-and-publish
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
tags:
|
||||||
|
- "v*"
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-and-publish:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout code
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
- name: Set up Python 3.11
|
||||||
|
uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: "3.11"
|
||||||
|
- name: Build and publish
|
||||||
|
run: |
|
||||||
|
echo $GITHUB_REF
|
||||||
|
sed -i 's/version = "[^"]*"/version = "${GITHUB_REF##*/}"/g' pyproject.toml
|
||||||
|
curl -sSL https://install.python-poetry.org | python3 -
|
||||||
|
export PATH="/root/.local/bin:$PATH"
|
||||||
|
poetry --version
|
||||||
|
poetry build
|
||||||
|
poetry publish --username __token__ --password ${{ secrets.PYPI_TOKEN }}
|
||||||
|
env:
|
||||||
|
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
|
@@ -0,0 +1 @@
|
|||||||
|
from .extract import from_pdf
|
||||||
|
@@ -38,8 +38,9 @@ def catch_malformed_table(tables):
|
|||||||
return tables[0]
|
return tables[0]
|
||||||
|
|
||||||
|
|
||||||
def from_pdf(pdf):
|
def from_pdf(pdf_file):
|
||||||
"""Build dataframes one about charges and another on loc"""
|
"""Build dataframes one about charges and another on loc"""
|
||||||
|
pdf = pdfplumber.open(pdf_file)
|
||||||
recapitulatif_tables = []
|
recapitulatif_tables = []
|
||||||
loc_tables = []
|
loc_tables = []
|
||||||
charge_tables = []
|
charge_tables = []
|
||||||
@@ -90,8 +91,7 @@ def extract_save(pdf_file, dest):
|
|||||||
xls_charge = Path(dest) / f"{pdf_file.stem.replace(' ', '_')}_charge.xlsx"
|
xls_charge = Path(dest) / f"{pdf_file.stem.replace(' ', '_')}_charge.xlsx"
|
||||||
xls_locataire = Path(dest) / f"{pdf_file.stem.replace(' ', '_')}_locataire.xlsx"
|
xls_locataire = Path(dest) / f"{pdf_file.stem.replace(' ', '_')}_locataire.xlsx"
|
||||||
|
|
||||||
pdf = pdfplumber.open(pdf_file)
|
df_charge, df_loc = from_pdf(pdf_file)
|
||||||
df_charge, df_loc = from_pdf(pdf)
|
|
||||||
|
|
||||||
df_charge.to_excel(xls_charge, sheet_name="Charges", index=False)
|
df_charge.to_excel(xls_charge, sheet_name="Charges", index=False)
|
||||||
logging.info(f"{xls_charge} saved")
|
logging.info(f"{xls_charge} saved")
|
||||||
|
29
pdf_oralia/join.py
Normal file
29
pdf_oralia/join.py
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
import glob
|
||||||
|
import logging
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
def join_excel(src, dest, file_pattern):
|
||||||
|
"""Join every excel file in arc respecting file_pattern into on unique file in dist"""
|
||||||
|
filenames = list_files(src, file_pattern)
|
||||||
|
logging.debug(f"Concatenate {filenames}")
|
||||||
|
dfs = extract_dfs(filenames)
|
||||||
|
joined_df = pd.concat(dfs)
|
||||||
|
logging.debug(f"Writing joined excel to {dest}")
|
||||||
|
joined_df.to_excel(dest, index=False)
|
||||||
|
logging.debug(f"with {len(joined_df)} rows")
|
||||||
|
|
||||||
|
|
||||||
|
def list_files(src, file_glob):
|
||||||
|
return list(glob.iglob(f"{src}/{file_glob}"))
|
||||||
|
|
||||||
|
|
||||||
|
def extract_dfs(filenames):
|
||||||
|
dfs = []
|
||||||
|
for filename in filenames:
|
||||||
|
logging.debug(f"Extracting {filename}")
|
||||||
|
df = pd.read_excel(filename)
|
||||||
|
logging.debug(f"Found {len(df)} rows")
|
||||||
|
dfs.append(df)
|
||||||
|
return dfs
|
@@ -5,29 +5,33 @@ from pathlib import Path
|
|||||||
import click
|
import click
|
||||||
|
|
||||||
from .extract import extract_save
|
from .extract import extract_save
|
||||||
|
from .join import join_excel
|
||||||
logging_config = dict(
|
|
||||||
version=1,
|
|
||||||
formatters={"f": {"format": "%(levelname)-8s %(name)-12s %(message)s"}},
|
|
||||||
handlers={
|
|
||||||
"h": {
|
|
||||||
"class": "logging.StreamHandler",
|
|
||||||
"formatter": "f",
|
|
||||||
"level": logging.DEBUG,
|
|
||||||
}
|
|
||||||
},
|
|
||||||
root={
|
|
||||||
"handlers": ["h"],
|
|
||||||
"level": logging.DEBUG,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
dictConfig(logging_config)
|
|
||||||
|
|
||||||
|
|
||||||
@click.group()
|
@click.group()
|
||||||
def main():
|
@click.option("--debug/--no-debug", default=False)
|
||||||
pass
|
def main(debug):
|
||||||
|
if debug:
|
||||||
|
logging_level = logging.DEBUG
|
||||||
|
else:
|
||||||
|
logging_level = logging.INFO
|
||||||
|
logging_config = dict(
|
||||||
|
version=1,
|
||||||
|
formatters={"f": {"format": "%(levelname)-8s %(name)-12s %(message)s"}},
|
||||||
|
handlers={
|
||||||
|
"h": {
|
||||||
|
"class": "logging.StreamHandler",
|
||||||
|
"formatter": "f",
|
||||||
|
"level": logging_level,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
root={
|
||||||
|
"handlers": ["h"],
|
||||||
|
"level": logging_level,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
dictConfig(logging_config)
|
||||||
|
|
||||||
|
|
||||||
@main.group()
|
@main.group()
|
||||||
@@ -64,5 +68,31 @@ def all(src, dest):
|
|||||||
@main.command()
|
@main.command()
|
||||||
@click.option("--src", help="Tous les fichiers dans src", default="./")
|
@click.option("--src", help="Tous les fichiers dans src", default="./")
|
||||||
@click.option("--dest", help="Où mettre les fichiers produits", default="")
|
@click.option("--dest", help="Où mettre les fichiers produits", default="")
|
||||||
def join(src, dest):
|
@click.option(
|
||||||
join_excel(src, dest, df_names=["charge", "locataire"])
|
"--force",
|
||||||
|
help="Ecraser si le ficher destination existe.",
|
||||||
|
default=False,
|
||||||
|
is_flag=True,
|
||||||
|
)
|
||||||
|
def join(src, dest, force):
|
||||||
|
"""Join tous les fichiers excel charge (resp locataire) de src dans un seul fichier charge.xlsx dans dist.
|
||||||
|
|
||||||
|
Exemple:
|
||||||
|
|
||||||
|
pdf-oralia join --src <dossier_source> --dest <dossier_destination>
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
dest_charge = f"{dest}/charge.xlsx"
|
||||||
|
if not force and Path(dest_charge).exists():
|
||||||
|
raise ValueError(f"The file {dest_charge} already exists")
|
||||||
|
dest_locataire = f"{dest}/locataire.xlsx"
|
||||||
|
if not force and Path(dest_locataire).exists():
|
||||||
|
raise ValueError(f"The file {dest_locataire} already exists")
|
||||||
|
|
||||||
|
if not Path(src).exists():
|
||||||
|
raise ValueError(f"The source directory ({src}) does not exists.")
|
||||||
|
join_excel(src, dest_charge, "*_charge.xlsx")
|
||||||
|
logging.info(f"Les données charges ont été concaténées dans {dest_charge}")
|
||||||
|
join_excel(src, dest_locataire, "*_locataire.xlsx")
|
||||||
|
logging.info(f"Les données locataires ont été concaténées dans {dest_locataire}")
|
||||||
|
2927
poetry.lock
generated
2927
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,6 @@
|
|||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "pdf-oralia"
|
name = "pdf-oralia"
|
||||||
version = "dev"
|
version = "0"
|
||||||
description = ""
|
description = ""
|
||||||
authors = ["Bertrand Benjamin <benjamin.bertrand@opytex.org>"]
|
authors = ["Bertrand Benjamin <benjamin.bertrand@opytex.org>"]
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
@@ -13,7 +13,7 @@ pdf-oralia = "pdf_oralia.scripts:main"
|
|||||||
python = "^3.10"
|
python = "^3.10"
|
||||||
click = "^8.1.3"
|
click = "^8.1.3"
|
||||||
pdfplumber = "^0.7.4"
|
pdfplumber = "^0.7.4"
|
||||||
pandas = "^1.5.0"
|
pandas = "^2.2.3"
|
||||||
openpyxl = "^3.0.10"
|
openpyxl = "^3.0.10"
|
||||||
|
|
||||||
|
|
||||||
|
2
renovate.json
Normal file
2
renovate.json
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
{
|
||||||
|
}
|
@@ -1,76 +1,3 @@
|
|||||||
argon2-cffi==21.3.0
|
pdfplumber
|
||||||
argon2-cffi-bindings==21.2.0
|
numpy
|
||||||
asttokens==2.0.8
|
pandas
|
||||||
attrs==22.1.0
|
|
||||||
backcall==0.2.0
|
|
||||||
beautifulsoup4==4.11.1
|
|
||||||
bleach==5.0.1
|
|
||||||
cffi==1.15.1
|
|
||||||
charset-normalizer==2.1.1
|
|
||||||
cryptography==38.0.1
|
|
||||||
debugpy==1.6.3
|
|
||||||
decorator==5.1.1
|
|
||||||
defusedxml==0.7.1
|
|
||||||
entrypoints==0.4
|
|
||||||
et-xmlfile==1.1.0
|
|
||||||
executing==1.1.0
|
|
||||||
fastjsonschema==2.16.2
|
|
||||||
ipykernel==6.16.0
|
|
||||||
ipython==8.5.0
|
|
||||||
ipython-genutils==0.2.0
|
|
||||||
ipywidgets==8.0.2
|
|
||||||
jedi==0.18.1
|
|
||||||
Jinja2==3.1.2
|
|
||||||
jsonschema==4.16.0
|
|
||||||
jupyter==1.0.0
|
|
||||||
jupyter-console==6.4.4
|
|
||||||
jupyter-core==4.11.1
|
|
||||||
jupyter_client==7.3.5
|
|
||||||
jupyterlab-pygments==0.2.2
|
|
||||||
jupyterlab-widgets==3.0.3
|
|
||||||
lxml==4.9.1
|
|
||||||
MarkupSafe==2.1.1
|
|
||||||
matplotlib-inline==0.1.6
|
|
||||||
mistune==2.0.4
|
|
||||||
nbclient==0.6.8
|
|
||||||
nbconvert==7.0.0
|
|
||||||
nbformat==5.6.1
|
|
||||||
nest-asyncio==1.5.5
|
|
||||||
notebook==6.4.12
|
|
||||||
numpy==1.23.3
|
|
||||||
openpyxl==3.0.10
|
|
||||||
packaging==21.3
|
|
||||||
pandas==1.5.0
|
|
||||||
pandocfilters==1.5.0
|
|
||||||
parso==0.8.3
|
|
||||||
pdfminer.six==20220524
|
|
||||||
pdfplumber==0.7.4
|
|
||||||
pexpect==4.8.0
|
|
||||||
pickleshare==0.7.5
|
|
||||||
Pillow==9.2.0
|
|
||||||
prometheus-client==0.14.1
|
|
||||||
prompt-toolkit==3.0.31
|
|
||||||
psutil==5.9.2
|
|
||||||
ptyprocess==0.7.0
|
|
||||||
pure-eval==0.2.2
|
|
||||||
pycparser==2.21
|
|
||||||
Pygments==2.13.0
|
|
||||||
pyparsing==3.0.9
|
|
||||||
pyrsistent==0.18.1
|
|
||||||
python-dateutil==2.8.2
|
|
||||||
pytz==2022.2.1
|
|
||||||
pyzmq==24.0.1
|
|
||||||
qtconsole==5.3.2
|
|
||||||
QtPy==2.2.0
|
|
||||||
Send2Trash==1.8.0
|
|
||||||
six==1.16.0
|
|
||||||
soupsieve==2.3.2.post1
|
|
||||||
stack-data==0.5.1
|
|
||||||
terminado==0.15.0
|
|
||||||
tinycss2==1.1.1
|
|
||||||
tornado==6.2
|
|
||||||
traitlets==5.4.0
|
|
||||||
Wand==0.6.10
|
|
||||||
wcwidth==0.2.5
|
|
||||||
webencodings==0.5.1
|
|
||||||
widgetsnbextension==4.0.3
|
|
||||||
|
Reference in New Issue
Block a user