Compare commits
21 Commits
ec9cc19be5
...
v0.0.1
Author | SHA1 | Date | |
---|---|---|---|
8b4d5826a5 | |||
6e0ffe9085 | |||
ab2fdb0541 | |||
0fc39ed317 | |||
a6d6681756 | |||
4eecb3a44c | |||
60da623323 | |||
1f1e3e2741 | |||
2b3e935f39 | |||
ef63f22d44 | |||
1020ef9257 | |||
39084ceebd | |||
7de6c8dd9c | |||
da3815eea6 | |||
45d343d810 | |||
806227f202 | |||
7bf0c38883 | |||
b15b059e2a | |||
48e75358ac | |||
132e37267b | |||
f2bcf6241a |
28
.gitea/workflows/publish_tag.yml
Normal file
28
.gitea/workflows/publish_tag.yml
Normal file
@@ -0,0 +1,28 @@
|
||||
name: pdf-auralia-build-and-publish
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "v*"
|
||||
|
||||
jobs:
|
||||
build-and-publish:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v3
|
||||
- name: Set up Python 3.11
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- name: Build and publish
|
||||
run: |
|
||||
echo $GITHUB_REF
|
||||
sed -i 's/version = "[^"]*"/version = "${GITHUB_REF##*/}"/g' pyproject.toml
|
||||
curl -sSL https://install.python-poetry.org | python3 -
|
||||
export PATH="/root/.local/bin:$PATH"
|
||||
poetry --version
|
||||
poetry build
|
||||
poetry publish --username __token__ --password ${{ secrets.PYPI_TOKEN }}
|
||||
env:
|
||||
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
|
@@ -0,0 +1 @@
|
||||
from .extract import from_pdf
|
||||
|
@@ -38,8 +38,9 @@ def catch_malformed_table(tables):
|
||||
return tables[0]
|
||||
|
||||
|
||||
def from_pdf(pdf):
|
||||
def from_pdf(pdf_file):
|
||||
"""Build dataframes one about charges and another on loc"""
|
||||
pdf = pdfplumber.open(pdf_file)
|
||||
recapitulatif_tables = []
|
||||
loc_tables = []
|
||||
charge_tables = []
|
||||
@@ -90,8 +91,7 @@ def extract_save(pdf_file, dest):
|
||||
xls_charge = Path(dest) / f"{pdf_file.stem.replace(' ', '_')}_charge.xlsx"
|
||||
xls_locataire = Path(dest) / f"{pdf_file.stem.replace(' ', '_')}_locataire.xlsx"
|
||||
|
||||
pdf = pdfplumber.open(pdf_file)
|
||||
df_charge, df_loc = from_pdf(pdf)
|
||||
df_charge, df_loc = from_pdf(pdf_file)
|
||||
|
||||
df_charge.to_excel(xls_charge, sheet_name="Charges", index=False)
|
||||
logging.info(f"{xls_charge} saved")
|
||||
|
29
pdf_oralia/join.py
Normal file
29
pdf_oralia/join.py
Normal file
@@ -0,0 +1,29 @@
|
||||
import glob
|
||||
import logging
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def join_excel(src, dest, file_pattern):
|
||||
"""Join every excel file in arc respecting file_pattern into on unique file in dist"""
|
||||
filenames = list_files(src, file_pattern)
|
||||
logging.debug(f"Concatenate {filenames}")
|
||||
dfs = extract_dfs(filenames)
|
||||
joined_df = pd.concat(dfs)
|
||||
logging.debug(f"Writing joined excel to {dest}")
|
||||
joined_df.to_excel(dest, index=False)
|
||||
logging.debug(f"with {len(joined_df)} rows")
|
||||
|
||||
|
||||
def list_files(src, file_glob):
|
||||
return list(glob.iglob(f"{src}/{file_glob}"))
|
||||
|
||||
|
||||
def extract_dfs(filenames):
|
||||
dfs = []
|
||||
for filename in filenames:
|
||||
logging.debug(f"Extracting {filename}")
|
||||
df = pd.read_excel(filename)
|
||||
logging.debug(f"Found {len(df)} rows")
|
||||
dfs.append(df)
|
||||
return dfs
|
@@ -5,29 +5,33 @@ from pathlib import Path
|
||||
import click
|
||||
|
||||
from .extract import extract_save
|
||||
|
||||
logging_config = dict(
|
||||
version=1,
|
||||
formatters={"f": {"format": "%(levelname)-8s %(name)-12s %(message)s"}},
|
||||
handlers={
|
||||
"h": {
|
||||
"class": "logging.StreamHandler",
|
||||
"formatter": "f",
|
||||
"level": logging.DEBUG,
|
||||
}
|
||||
},
|
||||
root={
|
||||
"handlers": ["h"],
|
||||
"level": logging.DEBUG,
|
||||
},
|
||||
)
|
||||
|
||||
dictConfig(logging_config)
|
||||
from .join import join_excel
|
||||
|
||||
|
||||
@click.group()
|
||||
def main():
|
||||
pass
|
||||
@click.option("--debug/--no-debug", default=False)
|
||||
def main(debug):
|
||||
if debug:
|
||||
logging_level = logging.DEBUG
|
||||
else:
|
||||
logging_level = logging.INFO
|
||||
logging_config = dict(
|
||||
version=1,
|
||||
formatters={"f": {"format": "%(levelname)-8s %(name)-12s %(message)s"}},
|
||||
handlers={
|
||||
"h": {
|
||||
"class": "logging.StreamHandler",
|
||||
"formatter": "f",
|
||||
"level": logging_level,
|
||||
}
|
||||
},
|
||||
root={
|
||||
"handlers": ["h"],
|
||||
"level": logging_level,
|
||||
},
|
||||
)
|
||||
|
||||
dictConfig(logging_config)
|
||||
|
||||
|
||||
@main.group()
|
||||
@@ -64,5 +68,31 @@ def all(src, dest):
|
||||
@main.command()
|
||||
@click.option("--src", help="Tous les fichiers dans src", default="./")
|
||||
@click.option("--dest", help="Où mettre les fichiers produits", default="")
|
||||
def join(src, dest):
|
||||
join_excel(src, dest, df_names=["charge", "locataire"])
|
||||
@click.option(
|
||||
"--force",
|
||||
help="Ecraser si le ficher destination existe.",
|
||||
default=False,
|
||||
is_flag=True,
|
||||
)
|
||||
def join(src, dest, force):
|
||||
"""Join tous les fichiers excel charge (resp locataire) de src dans un seul fichier charge.xlsx dans dist.
|
||||
|
||||
Exemple:
|
||||
|
||||
pdf-oralia join --src <dossier_source> --dest <dossier_destination>
|
||||
|
||||
|
||||
"""
|
||||
dest_charge = f"{dest}/charge.xlsx"
|
||||
if not force and Path(dest_charge).exists():
|
||||
raise ValueError(f"The file {dest_charge} already exists")
|
||||
dest_locataire = f"{dest}/locataire.xlsx"
|
||||
if not force and Path(dest_locataire).exists():
|
||||
raise ValueError(f"The file {dest_locataire} already exists")
|
||||
|
||||
if not Path(src).exists():
|
||||
raise ValueError(f"The source directory ({src}) does not exists.")
|
||||
join_excel(src, dest_charge, "*_charge.xlsx")
|
||||
logging.info(f"Les données charges ont été concaténées dans {dest_charge}")
|
||||
join_excel(src, dest_locataire, "*_locataire.xlsx")
|
||||
logging.info(f"Les données locataires ont été concaténées dans {dest_locataire}")
|
||||
|
2927
poetry.lock
generated
2927
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -13,7 +13,7 @@ pdf-oralia = "pdf_oralia.scripts:main"
|
||||
python = "^3.10"
|
||||
click = "^8.1.3"
|
||||
pdfplumber = "^0.7.4"
|
||||
pandas = "^1.5.0"
|
||||
pandas = "^2.2.3"
|
||||
openpyxl = "^3.0.10"
|
||||
|
||||
|
||||
|
2
renovate.json
Normal file
2
renovate.json
Normal file
@@ -0,0 +1,2 @@
|
||||
{
|
||||
}
|
@@ -1,76 +1,3 @@
|
||||
argon2-cffi==21.3.0
|
||||
argon2-cffi-bindings==21.2.0
|
||||
asttokens==2.0.8
|
||||
attrs==22.1.0
|
||||
backcall==0.2.0
|
||||
beautifulsoup4==4.11.1
|
||||
bleach==5.0.1
|
||||
cffi==1.15.1
|
||||
charset-normalizer==2.1.1
|
||||
cryptography==38.0.1
|
||||
debugpy==1.6.3
|
||||
decorator==5.1.1
|
||||
defusedxml==0.7.1
|
||||
entrypoints==0.4
|
||||
et-xmlfile==1.1.0
|
||||
executing==1.1.0
|
||||
fastjsonschema==2.16.2
|
||||
ipykernel==6.16.0
|
||||
ipython==8.5.0
|
||||
ipython-genutils==0.2.0
|
||||
ipywidgets==8.0.2
|
||||
jedi==0.18.1
|
||||
Jinja2==3.1.2
|
||||
jsonschema==4.16.0
|
||||
jupyter==1.0.0
|
||||
jupyter-console==6.4.4
|
||||
jupyter-core==4.11.1
|
||||
jupyter_client==7.3.5
|
||||
jupyterlab-pygments==0.2.2
|
||||
jupyterlab-widgets==3.0.3
|
||||
lxml==4.9.1
|
||||
MarkupSafe==2.1.1
|
||||
matplotlib-inline==0.1.6
|
||||
mistune==2.0.4
|
||||
nbclient==0.6.8
|
||||
nbconvert==7.0.0
|
||||
nbformat==5.6.1
|
||||
nest-asyncio==1.5.5
|
||||
notebook==6.4.12
|
||||
numpy==1.23.3
|
||||
openpyxl==3.0.10
|
||||
packaging==21.3
|
||||
pandas==1.5.0
|
||||
pandocfilters==1.5.0
|
||||
parso==0.8.3
|
||||
pdfminer.six==20220524
|
||||
pdfplumber==0.7.4
|
||||
pexpect==4.8.0
|
||||
pickleshare==0.7.5
|
||||
Pillow==9.2.0
|
||||
prometheus-client==0.14.1
|
||||
prompt-toolkit==3.0.31
|
||||
psutil==5.9.2
|
||||
ptyprocess==0.7.0
|
||||
pure-eval==0.2.2
|
||||
pycparser==2.21
|
||||
Pygments==2.13.0
|
||||
pyparsing==3.0.9
|
||||
pyrsistent==0.18.1
|
||||
python-dateutil==2.8.2
|
||||
pytz==2022.2.1
|
||||
pyzmq==24.0.1
|
||||
qtconsole==5.3.2
|
||||
QtPy==2.2.0
|
||||
Send2Trash==1.8.0
|
||||
six==1.16.0
|
||||
soupsieve==2.3.2.post1
|
||||
stack-data==0.5.1
|
||||
terminado==0.15.0
|
||||
tinycss2==1.1.1
|
||||
tornado==6.2
|
||||
traitlets==5.4.0
|
||||
Wand==0.6.10
|
||||
wcwidth==0.2.5
|
||||
webencodings==0.5.1
|
||||
widgetsnbextension==4.0.3
|
||||
pdfplumber
|
||||
numpy
|
||||
pandas
|
||||
|
Reference in New Issue
Block a user