pdf_auralia/pdf_oralia/scripts.py

83 lines
2.0 KiB
Python
Raw Normal View History

import logging
from logging.config import dictConfig
2022-09-27 12:48:41 +00:00
from pathlib import Path
2022-09-27 13:01:14 +00:00
2022-09-27 12:48:41 +00:00
import click
2022-09-28 14:47:22 +00:00
import pandas as pd
2022-09-27 12:48:41 +00:00
2022-09-27 14:01:09 +00:00
from .extract import extract_save
2022-09-27 12:48:41 +00:00
logging_config = dict(
version=1,
formatters={"f": {"format": "%(levelname)-8s %(name)-12s %(message)s"}},
handlers={
"h": {
"class": "logging.StreamHandler",
"formatter": "f",
"level": logging.DEBUG,
}
},
root={
"handlers": ["h"],
"level": logging.DEBUG,
},
)
dictConfig(logging_config)
2022-09-27 12:48:41 +00:00
2022-09-27 14:01:09 +00:00
@click.group()
def main():
pass
2022-09-27 12:48:41 +00:00
2022-09-27 14:01:09 +00:00
@main.group()
def extract():
pass
2022-09-27 12:48:41 +00:00
2022-09-27 14:01:09 +00:00
@extract.command()
2022-09-27 19:13:58 +00:00
@click.argument("pdf_file", required=1)
2022-09-28 03:33:09 +00:00
@click.option("--dest", help="Où mettre les fichiers produits", default="")
def on(pdf_file, dest):
if not dest:
pdf_path = Path(pdf_file)
dest = pdf_path.parent
extract_save(pdf_file, dest)
2022-09-27 12:48:41 +00:00
2022-09-27 14:01:09 +00:00
@extract.command()
@click.option("--folder", help="Tous les fichiers dans folder", default="./")
@click.option("--dest", help="Où mettre les fichiers produits", default="./")
def all(folder, dest):
p = Path(folder)
2022-09-27 12:48:41 +00:00
2022-09-27 14:01:09 +00:00
d = Path(dest)
d.mkdir(exist_ok=True)
2022-09-27 13:01:14 +00:00
2022-09-27 14:01:09 +00:00
pdf_files = [x for x in p.iterdir() if ".pdf" in str(x)]
for pdf_file in pdf_files:
logging.info(f"Found {pdf_file}")
2022-09-27 14:01:09 +00:00
extract_save(pdf_file, d)
2022-09-27 12:48:41 +00:00
2022-09-27 14:01:09 +00:00
@main.command()
2022-09-28 14:47:22 +00:00
@click.option("--folder", help="Tous les fichiers dans folder", default="./")
@click.option("--dest", help="Où mettre les fichiers produits", default="")
def join(folder, dest):
p = Path(folder)
dfs = {
"charge": [],
"locataire": [],
}
for file in p.glob("*.xlsx"):
year, month, immeuble, table = file.stem.split("_")
df = pd.read_excel(file).assign(annee=year, mois=month, immeuble=immeuble[:3])
2022-09-28 14:47:22 +00:00
dfs[table].append(df)
for tablename, datas in dfs.items():
df = pd.concat(datas)
destination = Path(dest) / f"{tablename}.xlsx"
df.to_excel(destination, index=False)
logging.info(f"{destination} written")