Compare commits
49 Commits
Author | SHA1 | Date | |
---|---|---|---|
f12e5c05a1 | |||
3b9db43f0d | |||
8df9be825f | |||
d0fc473134 | |||
092b925b68 | |||
3c18bd5d81 | |||
4ee78a7e7b | |||
ce8cdc4c1e | |||
6e0ffe9085 | |||
ab2fdb0541 | |||
0fc39ed317 | |||
a6d6681756 | |||
4eecb3a44c | |||
60da623323 | |||
1f1e3e2741 | |||
2b3e935f39 | |||
ef63f22d44 | |||
1020ef9257 | |||
39084ceebd | |||
7de6c8dd9c | |||
da3815eea6 | |||
45d343d810 | |||
806227f202 | |||
7bf0c38883 | |||
b15b059e2a | |||
48e75358ac | |||
132e37267b | |||
f2bcf6241a | |||
ec9cc19be5 | |||
0040dccd9a | |||
b0333cddd8 | |||
406b89fea1 | |||
812d392720 | |||
6b77980e6c | |||
90c2d3689b | |||
f9be31c090 | |||
2761c3ed7b | |||
5692898137 | |||
44d4150910 | |||
223f25130d | |||
1a86b7bc26 | |||
c56241fe4c | |||
ceebfb0a38 | |||
18c8282f63 | |||
020fd41eab | |||
8a55e6e2cc | |||
1afb2a32ab | |||
e1332e5e4e | |||
a8550712fe |
21
.drone.yml
21
.drone.yml
@@ -11,22 +11,29 @@ steps:
|
|||||||
- name: build-and-publish
|
- name: build-and-publish
|
||||||
image: python:3.11
|
image: python:3.11
|
||||||
commands:
|
commands:
|
||||||
- echo "Tag: ${TAG}"
|
- echo ${DRONE_TAG}
|
||||||
- sed -i "s/{{VERSION_PLACEHOLDER}}/${TAG}/g" pyproject.toml
|
- sed -i 's/version = "[^"]*"/version = "${DRONE_TAG}"/g' pyproject.toml
|
||||||
- curl -sSL https://install.python-poetry.org | python3 -
|
- curl -sSL https://install.python-poetry.org | python3 -
|
||||||
- export PATH="/root/.local/bin:$PATH"
|
- export PATH="/root/.local/bin:$PATH"
|
||||||
- poetry --version
|
- poetry --version
|
||||||
- poetry build
|
- poetry build
|
||||||
- poetry publish --username __token__ --password $PYPI_TOKEN
|
- poetry publish --username __token__ --password $PYPI_TOKEN
|
||||||
environment:
|
environment:
|
||||||
TAG: ${DRONE_COMMIT_TAG}
|
|
||||||
PYPI_TOKEN:
|
PYPI_TOKEN:
|
||||||
from_secret: pypi_token
|
from_secret: pypi_token
|
||||||
|
|
||||||
when:
|
- name: Notify on matrix
|
||||||
event:
|
image: plugins/matrix
|
||||||
include:
|
environment:
|
||||||
- tag
|
MATRIX_ROOMID:
|
||||||
|
from_secret: MATRIX_ROOMID
|
||||||
|
MATRIX_ACCESSTOKEN:
|
||||||
|
from_secret: MATRIX_ACCESSTOKEN
|
||||||
|
MATRIX_USERID:
|
||||||
|
from_secret: MATRIX_USERID
|
||||||
|
settings:
|
||||||
|
homeserver: https://matrix.poneyworld.net
|
||||||
|
template: "Une nouvelle version (${DRONE_TAG}) de pdf-oralia est publiée!"
|
||||||
|
|
||||||
# Déclencheur de la pipeline
|
# Déclencheur de la pipeline
|
||||||
trigger:
|
trigger:
|
||||||
|
@@ -1,19 +0,0 @@
|
|||||||
name: Gitea Actions Demo
|
|
||||||
run-name: ${{ gitea.actor }} is testing out Gitea Actions 🚀
|
|
||||||
on: [push]
|
|
||||||
jobs:
|
|
||||||
Hello-Gitea-Actions:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Check out repository code
|
|
||||||
uses: actions/checkout@v3
|
|
||||||
- name: Set up Python 3.10
|
|
||||||
uses: https://github.com/actions/setup-python@v4
|
|
||||||
with:
|
|
||||||
python-version: '3.10.12'
|
|
||||||
- name: Run image
|
|
||||||
uses: https://github.com/abatilo/actions-poetry@v2
|
|
||||||
with:
|
|
||||||
poetry-version: 1.5
|
|
||||||
- name: View poetry --help
|
|
||||||
run: poetry --help
|
|
@@ -1,31 +0,0 @@
|
|||||||
name: Publish to pipy
|
|
||||||
run-name: ${{ gitea.repository }} publish 🚀 to pipy
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
tags:
|
|
||||||
- '*'
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
build-n-publish:
|
|
||||||
name: Build and publish Python 🐍 distributions 📦 to PyPI
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Check out repository code
|
|
||||||
uses: actions/checkout@v3
|
|
||||||
- name: Set up Python 3.10
|
|
||||||
uses: https://github.com/actions/setup-python@v4
|
|
||||||
with:
|
|
||||||
python-version: '3.11'
|
|
||||||
- name: Run image
|
|
||||||
uses: https://github.com/abatilo/actions-poetry@v2
|
|
||||||
with:
|
|
||||||
poetry-version: 1.5
|
|
||||||
- name: Extract tag name
|
|
||||||
id: tag
|
|
||||||
run: echo ::set-output name=TAG_NAME::$(echo $GITHUB_REF | cut -d / -f 3)
|
|
||||||
- name: Update version in setup.py
|
|
||||||
run: >-
|
|
||||||
sed -i "s/{{VERSION_PLACEHOLDER}}/${{ steps.tag.outputs.TAG_NAME }}/g" pyproject.toml
|
|
||||||
- name: View poetry --help
|
|
||||||
run: poetry --help
|
|
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,3 +1,4 @@
|
|||||||
|
secrets.env
|
||||||
pdfs/
|
pdfs/
|
||||||
output/
|
output/
|
||||||
|
|
||||||
|
1059
Extract pdf.ipynb
1059
Extract pdf.ipynb
File diff suppressed because one or more lines are too long
23
README.md
23
README.md
@@ -0,0 +1,23 @@
|
|||||||
|
# PDF AURALIA
|
||||||
|
|
||||||
|
Extraction de fichiers de comptabilité en pdf vers xlsx.
|
||||||
|
|
||||||
|
## Utilisation
|
||||||
|
|
||||||
|
- Lancement sur un fichier pdf particulier
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pdf_oralia extract on <pdf_file> --dest <where to put producted files>
|
||||||
|
```
|
||||||
|
|
||||||
|
- Lancement sur tous les fichiers d'un repertoire (récursivement )
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pdf_oralia extract all --src <source folder> --dest <destination folder>
|
||||||
|
```
|
||||||
|
|
||||||
|
Cette commande reproduira la structure du dossier source dans destination. Seul les fichiers non existants seront traités. Par default, les fichiers déjà produits ne seront pas écrasés.
|
||||||
|
On peut ajouter les options suivantes:
|
||||||
|
|
||||||
|
- `--force`: pour écraser les fichiers déjà traités
|
||||||
|
- `--only-plan`: pour voir quels fichiers pourraient être créé sans le faire.
|
||||||
|
@@ -0,0 +1 @@
|
|||||||
|
from .extract import from_pdf
|
||||||
|
@@ -1,19 +1,22 @@
|
|||||||
import logging
|
import logging
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
import pdfplumber
|
import pdfplumber
|
||||||
|
|
||||||
from .extract_charge import extract_charge, extract_remise_com
|
from pdf_oralia.pages import charge, locataire, patrimoine
|
||||||
from .extract_locataire import extract_situation_loc
|
|
||||||
|
|
||||||
charge_table_settings = {
|
extract_table_settings = {
|
||||||
"vertical_strategy": "lines",
|
"vertical_strategy": "lines",
|
||||||
"horizontal_strategy": "text",
|
"horizontal_strategy": "text",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ExtractError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
def extract_date(page_text):
|
def extract_date(page_text):
|
||||||
"""Extract date from a page
|
"""Extract date from a page
|
||||||
|
|
||||||
@@ -27,52 +30,107 @@ def extract_date(page_text):
|
|||||||
return datetime.strptime(words[-1], "%d/%m/%Y")
|
return datetime.strptime(words[-1], "%d/%m/%Y")
|
||||||
|
|
||||||
|
|
||||||
def extract_from_pdf(pdf, charge_dest, location_dest):
|
def extract_building(page_text, buildings=["bloch", "marietton", "servient"]):
|
||||||
"""Build charge_dest and location_dest xlsx file from pdf"""
|
for building in buildings:
|
||||||
loc_tables = []
|
if building in page_text.lower():
|
||||||
charge_table = []
|
return building
|
||||||
|
raise ValueError("Pas d'immeuble trouvé")
|
||||||
|
|
||||||
df_1st_charge = extract_remise_com(
|
|
||||||
pdf.pages[0].extract_table(charge_table_settings)
|
|
||||||
)
|
|
||||||
|
|
||||||
for page in pdf.pages[1:]:
|
def pdf_extract_tables_lines(pdf):
|
||||||
|
loc_sink = locataire.fsm()
|
||||||
|
next(loc_sink)
|
||||||
|
charge_sink = charge.fsm()
|
||||||
|
next(charge_sink)
|
||||||
|
patrimoine_sink = patrimoine.fsm()
|
||||||
|
next(patrimoine_sink)
|
||||||
|
|
||||||
|
for page_number, page in enumerate(pdf.pages):
|
||||||
page_text = page.extract_text()
|
page_text = page.extract_text()
|
||||||
situation_loc_line = [
|
|
||||||
l for l in page_text.split("\n") if "SITUATION DES LOCATAIRES" in l
|
|
||||||
]
|
|
||||||
date = extract_date(page_text)
|
date = extract_date(page_text)
|
||||||
mois = date.strftime("%m")
|
try:
|
||||||
annee = date.strftime("%Y")
|
additionnal_fields = {
|
||||||
if situation_loc_line:
|
"immeuble": extract_building(page_text),
|
||||||
# mois, annee = situation_loc_line[0].split(" ")[-2:]
|
"mois": date.strftime("%m"),
|
||||||
if loc_tables:
|
"annee": date.strftime("%Y"),
|
||||||
loc_tables.append(page.extract_table()[1:])
|
}
|
||||||
else:
|
except ValueError:
|
||||||
loc_tables.append(page.extract_table())
|
logging.warning(
|
||||||
|
f"L'immeuble de la page {page_number+1} non identifiable. Page ignorée."
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
table_type = ""
|
||||||
|
if locataire.is_it(page_text):
|
||||||
|
table_type = "locataire"
|
||||||
|
elif charge.is_it(page_text):
|
||||||
|
table_type = "charge"
|
||||||
|
elif patrimoine.is_it(page_text):
|
||||||
|
table_type = "patrimoine"
|
||||||
|
else:
|
||||||
|
logging.warning(
|
||||||
|
f"Type de la page {page_number+1} non identifiable. Page ignorée."
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
elif "RECAPITULATIF DES OPERATIONS" in page_text:
|
for line in page.extract_table(extract_table_settings):
|
||||||
if charge_table:
|
if table_type == "locataire":
|
||||||
charge_table += page.extract_table(charge_table_settings)[1:]
|
res = loc_sink.send(line)
|
||||||
else:
|
if res:
|
||||||
charge_table = page.extract_table(charge_table_settings)
|
res.update(additionnal_fields)
|
||||||
|
yield locataire.Line(**res)
|
||||||
|
elif table_type == "charge":
|
||||||
|
res = charge_sink.send(line)
|
||||||
|
if res:
|
||||||
|
res.update(additionnal_fields)
|
||||||
|
yield charge.Line(**res)
|
||||||
|
|
||||||
df_charge = extract_charge(charge_table)
|
elif table_type == "patrimoine":
|
||||||
df_charge_with_1st = pd.concat([df_1st_charge, df_charge])
|
res = patrimoine_sink.send(line)
|
||||||
df_charge_with_1st.to_excel(charge_dest, sheet_name="Charges", index=False)
|
if res:
|
||||||
logging.info(f"{charge_dest} saved")
|
res.update(additionnal_fields)
|
||||||
|
yield patrimoine.Line(**res)
|
||||||
df_loc = extract_situation_loc(loc_tables, mois=mois, annee=annee)
|
|
||||||
df_loc = df_loc.assign()
|
|
||||||
df_loc.to_excel(location_dest, sheet_name="Location", index=False)
|
|
||||||
logging.info(f"{location_dest} saved")
|
|
||||||
|
|
||||||
|
|
||||||
def extract_save(pdf_file, dest):
|
def from_pdf(pdf_file):
|
||||||
|
"""Build dataframes one about charges and another on loc"""
|
||||||
|
pdf = pdfplumber.open(pdf_file)
|
||||||
|
locataire_lines = []
|
||||||
|
charge_lines = []
|
||||||
|
patrimoine_lines = []
|
||||||
|
for line in pdf_extract_tables_lines(pdf):
|
||||||
|
if isinstance(line, locataire.Line):
|
||||||
|
locataire_lines.append(line)
|
||||||
|
elif isinstance(line, charge.Line):
|
||||||
|
charge_lines.append(line)
|
||||||
|
elif isinstance(line, patrimoine.Line):
|
||||||
|
patrimoine_lines.append(line)
|
||||||
|
else:
|
||||||
|
logging.warning(f"Page {page_number+1} non reconnu. Page ignorée.")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"charge": pd.DataFrame([c.__dict__ for c in charge_lines]),
|
||||||
|
"locataire": pd.DataFrame([c.__dict__ for c in locataire_lines]),
|
||||||
|
"patrimoine": pd.DataFrame([c.__dict__ for c in patrimoine_lines]),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def extract_plan(pdf_file, dest):
|
||||||
|
return {
|
||||||
|
"charge": Path(dest) / f"{pdf_file.stem.replace(' ', '_')}_charge.xlsx",
|
||||||
|
"locataire": Path(dest) / f"{pdf_file.stem.replace(' ', '_')}_locataire.xlsx",
|
||||||
|
"patrimoine": Path(dest) / f"{pdf_file.stem.replace(' ', '_')}_patrimoine.xlsx",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def extract_save(pdf_file, dest, save=[]):
|
||||||
"""Extract charge and locataire for pdf_file and put xlsx file in dest"""
|
"""Extract charge and locataire for pdf_file and put xlsx file in dest"""
|
||||||
pdf_file = Path(pdf_file)
|
pdf_file = Path(pdf_file)
|
||||||
xls_charge = Path(dest) / f"{pdf_file.stem.replace(' ', '_')}_charge.xlsx"
|
xlss = extract_plan(pdf_file, dest)
|
||||||
xls_locataire = Path(dest) / f"{pdf_file.stem.replace(' ', '_')}_locataire.xlsx"
|
|
||||||
|
|
||||||
pdf = pdfplumber.open(pdf_file)
|
dfs = from_pdf(pdf_file)
|
||||||
extract_from_pdf(pdf, xls_charge, xls_locataire)
|
|
||||||
|
for s in save:
|
||||||
|
dfs[s].to_excel(xlss[s], sheet_name=s, index=False)
|
||||||
|
logging.info(f"{xlss[s]} saved")
|
||||||
|
|
||||||
|
return {k: v for k, v in xlss.items() if k in save}
|
||||||
|
@@ -1,68 +0,0 @@
|
|||||||
import logging
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
|
|
||||||
def get_lot(x):
|
|
||||||
"""Return lot number from "RECAPITULATIF DES OPERATIONS" """
|
|
||||||
if x[:2].isdigit():
|
|
||||||
return x[:2]
|
|
||||||
if x[:1].isdigit():
|
|
||||||
return "0" + x[:1]
|
|
||||||
if x[:2] == "PC":
|
|
||||||
return "PC"
|
|
||||||
return ""
|
|
||||||
|
|
||||||
|
|
||||||
def extract_charge(table):
|
|
||||||
"""From pdfplumber table extract the charge dataframe"""
|
|
||||||
df = (
|
|
||||||
pd.DataFrame(table[1:], columns=table[0])
|
|
||||||
.replace("", np.nan)
|
|
||||||
.dropna(subset=["Débits", "Crédits"], how="all")
|
|
||||||
)
|
|
||||||
|
|
||||||
drop_index = df[
|
|
||||||
df["RECAPITULATIF DES OPERATIONS"].str.contains("TOTAUX", case=False)
|
|
||||||
| df["RECAPITULATIF DES OPERATIONS"].str.contains("Solde créditeur", case=False)
|
|
||||||
| df["RECAPITULATIF DES OPERATIONS"].str.contains("Solde débiteur", case=False)
|
|
||||||
| df["RECAPITULATIF DES OPERATIONS"].str.contains(
|
|
||||||
"Total des reglements locataires", case=False
|
|
||||||
)
|
|
||||||
].index
|
|
||||||
df.drop(drop_index, inplace=True)
|
|
||||||
|
|
||||||
df[""].mask(
|
|
||||||
df["RECAPITULATIF DES OPERATIONS"].str.contains("honoraires", case=False),
|
|
||||||
"IMI GERANCE",
|
|
||||||
inplace=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
df = df.assign(lot=df["RECAPITULATIF DES OPERATIONS"].map(get_lot))
|
|
||||||
|
|
||||||
df = df.astype(
|
|
||||||
{
|
|
||||||
"Débits": "float64",
|
|
||||||
"Crédits": "float64",
|
|
||||||
"Dont T.V.A.": "float64",
|
|
||||||
"Locatif": "float64",
|
|
||||||
"Déductible": "float64",
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
df.columns.values[0] = "Fournisseur"
|
|
||||||
return df
|
|
||||||
|
|
||||||
|
|
||||||
def extract_remise_com(table):
|
|
||||||
"""Extract "remise commercial" from first page"""
|
|
||||||
df = pd.DataFrame(table[1:], columns=table[0]).replace("", np.nan)
|
|
||||||
df = df[
|
|
||||||
df["RECAPITULATIF DES OPERATIONS"].str.contains(
|
|
||||||
"Remise commerciale gérance", case=False, na=False
|
|
||||||
)
|
|
||||||
]
|
|
||||||
|
|
||||||
df.columns.values[0] = "Fournisseur"
|
|
||||||
return df
|
|
@@ -1,81 +0,0 @@
|
|||||||
import logging
|
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
|
|
||||||
def parse_above_loc(content):
|
|
||||||
row = {}
|
|
||||||
app, loc, *_ = content.split("\n")
|
|
||||||
app_ = app.split(" ")
|
|
||||||
row["lot"] = f"{int(app_[1]):02d}"
|
|
||||||
row["type"] = " ".join(app_[2:])
|
|
||||||
row["locataire"] = loc
|
|
||||||
return pd.Series(row)
|
|
||||||
|
|
||||||
|
|
||||||
def join_row(last, next):
|
|
||||||
row = []
|
|
||||||
for i in range(len(last)):
|
|
||||||
if last[i] and next[i]:
|
|
||||||
row.append(f"{last[i]}\n{next[i]}")
|
|
||||||
elif last[i]:
|
|
||||||
row.append(last[i])
|
|
||||||
elif next[i]:
|
|
||||||
row.append(next[i])
|
|
||||||
else:
|
|
||||||
row.append("")
|
|
||||||
return row
|
|
||||||
|
|
||||||
|
|
||||||
def join_tables(tables):
|
|
||||||
|
|
||||||
joined = tables[0]
|
|
||||||
|
|
||||||
for t in tables[1:]:
|
|
||||||
last_row = joined[-1]
|
|
||||||
if "Totaux" not in last_row[0]:
|
|
||||||
first_row = t[0]
|
|
||||||
joined_row = join_row(last_row, first_row)
|
|
||||||
joined = joined[:-1] + [joined_row] + t[1:]
|
|
||||||
else:
|
|
||||||
joined += t
|
|
||||||
|
|
||||||
return joined
|
|
||||||
|
|
||||||
|
|
||||||
def extract_situation_loc(tables, mois, annee):
|
|
||||||
"""From pdfplumber table extract locataire df"""
|
|
||||||
table = join_tables(tables)
|
|
||||||
try:
|
|
||||||
df = pd.DataFrame(table[1:], columns=table[0])
|
|
||||||
except IndexError:
|
|
||||||
print(table)
|
|
||||||
rows = []
|
|
||||||
for i, row in df[df["Locataires"] == "Totaux"].iterrows():
|
|
||||||
above_row_loc = df.iloc[i - 1]["Locataires"]
|
|
||||||
up_row = pd.concat(
|
|
||||||
[
|
|
||||||
row,
|
|
||||||
parse_above_loc(above_row_loc),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
rows.append(up_row)
|
|
||||||
df_cleaned = pd.concat(rows, axis=1).T
|
|
||||||
df_cleaned.drop(["Locataires", "", "Période"], axis=1, inplace=True)
|
|
||||||
|
|
||||||
df_cleaned = df_cleaned.astype(
|
|
||||||
{
|
|
||||||
"Loyers": "float64",
|
|
||||||
"Taxes": "float64",
|
|
||||||
"Provisions": "float64",
|
|
||||||
"Divers": "float64",
|
|
||||||
"Total": "float64",
|
|
||||||
"Réglés": "float64",
|
|
||||||
"Impayés": "float64",
|
|
||||||
},
|
|
||||||
errors="ignore",
|
|
||||||
)
|
|
||||||
|
|
||||||
df_cleaned = df_cleaned.assign(mois=mois, annee=annee)
|
|
||||||
return df_cleaned
|
|
@@ -1,30 +1,29 @@
|
|||||||
|
import glob
|
||||||
import logging
|
import logging
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
def extract_excel_to_dfs(directory, df_names=["charge", "locataire"]):
|
def join_excel(src, dest, file_pattern):
|
||||||
p = Path(directory)
|
"""Join every excel file in arc respecting file_pattern into on unique file in dist"""
|
||||||
dfs = {name: [] for name in df_names}
|
filenames = list_files(src, file_pattern)
|
||||||
|
logging.debug(f"Concatenate {filenames}")
|
||||||
|
dfs = extract_dfs(filenames)
|
||||||
|
joined_df = pd.concat(dfs)
|
||||||
|
logging.debug(f"Writing joined excel to {dest}")
|
||||||
|
joined_df.to_excel(dest, index=False)
|
||||||
|
logging.debug(f"with {len(joined_df)} rows")
|
||||||
|
|
||||||
for file in p.glob("*.xlsx"):
|
|
||||||
year, month, immeuble, table = file.stem.split("_")
|
|
||||||
df = pd.read_excel(file, dtype={"lot": str}).assign(
|
|
||||||
annee=year, mois=month, immeuble=immeuble[:3]
|
|
||||||
)
|
|
||||||
dfs[table].append(df)
|
|
||||||
|
|
||||||
|
def list_files(src, file_glob):
|
||||||
|
return list(glob.iglob(f"{src}/{file_glob}"))
|
||||||
|
|
||||||
|
|
||||||
|
def extract_dfs(filenames):
|
||||||
|
dfs = []
|
||||||
|
for filename in filenames:
|
||||||
|
logging.debug(f"Extracting {filename}")
|
||||||
|
df = pd.read_excel(filename)
|
||||||
|
logging.debug(f"Found {len(df)} rows")
|
||||||
|
dfs.append(df)
|
||||||
return dfs
|
return dfs
|
||||||
|
|
||||||
|
|
||||||
def join_excel(directory, dest, df_names=["charge", "locataire"]):
|
|
||||||
dfs = extract_excel_to_dfs(directory, df_names)
|
|
||||||
destinations = {}
|
|
||||||
for tablename, datas in dfs.items():
|
|
||||||
df = pd.concat(datas)
|
|
||||||
destination = Path(dest) / f"{tablename}.xlsx"
|
|
||||||
df.to_excel(destination, index=False)
|
|
||||||
destinations[tablename] = destination
|
|
||||||
logging.info(f"{destination} written")
|
|
||||||
return destinations
|
|
||||||
|
1
pdf_oralia/pages/__init__.py
Normal file
1
pdf_oralia/pages/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
from . import charge, locataire, patrimoine, recapitulatif
|
124
pdf_oralia/pages/charge.py
Normal file
124
pdf_oralia/pages/charge.py
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
import re
|
||||||
|
from pydantic import BaseModel, field_validator
|
||||||
|
|
||||||
|
|
||||||
|
HEADER_CHARGE = [
|
||||||
|
"",
|
||||||
|
"RECAPITULATIF DES OPERATIONS",
|
||||||
|
"Débits",
|
||||||
|
"Crédits",
|
||||||
|
"Dont T.V.A.",
|
||||||
|
"Locatif",
|
||||||
|
"Déductible",
|
||||||
|
]
|
||||||
|
DF_TYPES = {
|
||||||
|
"Fournisseur": str,
|
||||||
|
"RECAPITULATIF DES OPERATIONS": str,
|
||||||
|
"Débits": float,
|
||||||
|
"Crédits": float,
|
||||||
|
"Dont T.V.A.": float,
|
||||||
|
"Locatif": float,
|
||||||
|
"Déductible": float,
|
||||||
|
"immeuble": str,
|
||||||
|
"mois": str,
|
||||||
|
"annee": str,
|
||||||
|
"lot": str,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class Line(BaseModel):
|
||||||
|
mois: int
|
||||||
|
annee: int
|
||||||
|
immeuble: str
|
||||||
|
lot: str
|
||||||
|
Champs: str
|
||||||
|
Categorie: str
|
||||||
|
Fournisseur: str
|
||||||
|
Libellé: str
|
||||||
|
Débit: float
|
||||||
|
Crédits: float
|
||||||
|
Dont_TVA: float
|
||||||
|
Locatif: float
|
||||||
|
Déductible: float
|
||||||
|
|
||||||
|
@field_validator(
|
||||||
|
"Débit", "Crédits", "Dont_TVA", "Locatif", "Déductible", mode="before"
|
||||||
|
)
|
||||||
|
def set_default_if_empty(cls, v):
|
||||||
|
if v == "":
|
||||||
|
return 0
|
||||||
|
return v
|
||||||
|
|
||||||
|
|
||||||
|
def is_it(page_text):
|
||||||
|
if (
|
||||||
|
"RECAPITULATIF DES OPERATIONS" in page_text
|
||||||
|
and "COMPTE RENDU DE GESTION" not in page_text
|
||||||
|
):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def get_lot(txt):
|
||||||
|
"""Return lot number from "RECAPITULATIF DES OPERATIONS" """
|
||||||
|
regex = r"[BSM](\d+)(?=\s*-)"
|
||||||
|
try:
|
||||||
|
result = re.findall(regex, txt)
|
||||||
|
except TypeError:
|
||||||
|
return "*"
|
||||||
|
if result:
|
||||||
|
return "{:02d}".format(int(result[0]))
|
||||||
|
return "*"
|
||||||
|
|
||||||
|
|
||||||
|
def fsm():
|
||||||
|
current_state = "total"
|
||||||
|
row = {}
|
||||||
|
line = yield
|
||||||
|
while True:
|
||||||
|
if line == HEADER_CHARGE:
|
||||||
|
line = yield
|
||||||
|
if current_state == "total":
|
||||||
|
if line[1].lower().split(" ")[0] in ["total", "totaux"]:
|
||||||
|
current_state = "new_champs"
|
||||||
|
line = yield
|
||||||
|
elif current_state == "new_champs":
|
||||||
|
if line[0] != "":
|
||||||
|
current_state = "new_cat_line"
|
||||||
|
row = {"Champs": line[0], "Categorie": "", "Fournisseur": ""}
|
||||||
|
line = yield
|
||||||
|
elif current_state == "new_cat_line":
|
||||||
|
if line[1].lower().split(" ")[0] in ["total", "totaux"]:
|
||||||
|
current_state = "new_champs"
|
||||||
|
line = yield
|
||||||
|
row = {}
|
||||||
|
elif line[2] != "" or line[3] != "":
|
||||||
|
row.update(
|
||||||
|
{
|
||||||
|
"Fournisseur": line[0] if line[0] != "" else row["Fournisseur"],
|
||||||
|
"Libellé": line[1],
|
||||||
|
"lot": get_lot(line[1]),
|
||||||
|
"Débit": line[2],
|
||||||
|
"Crédits": line[3],
|
||||||
|
"Dont_TVA": line[4],
|
||||||
|
"Locatif": line[5],
|
||||||
|
"Déductible": line[6],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
line = yield row
|
||||||
|
row = {
|
||||||
|
"Champs": row["Champs"],
|
||||||
|
"Categorie": row["Categorie"],
|
||||||
|
"Fournisseur": row["Fournisseur"],
|
||||||
|
}
|
||||||
|
elif line[0] != "" and line[1] == "":
|
||||||
|
row.update({"Categorie": line[0]})
|
||||||
|
line = yield
|
||||||
|
elif line[1] != "":
|
||||||
|
row.update({"Categorie": line[1]})
|
||||||
|
line = yield
|
||||||
|
elif line[0] != "":
|
||||||
|
row.update({"Fournisseur": line[0]})
|
||||||
|
line = yield
|
||||||
|
else:
|
||||||
|
line = yield
|
106
pdf_oralia/pages/locataire.py
Normal file
106
pdf_oralia/pages/locataire.py
Normal file
@@ -0,0 +1,106 @@
|
|||||||
|
from pydantic import BaseModel, field_validator
|
||||||
|
|
||||||
|
HEADER_LOC = [
|
||||||
|
"Locataires",
|
||||||
|
"Période",
|
||||||
|
"Loyers",
|
||||||
|
"Taxes",
|
||||||
|
"Provisions",
|
||||||
|
"Divers",
|
||||||
|
"",
|
||||||
|
"Total",
|
||||||
|
"Réglés",
|
||||||
|
"Impayés",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class Line(BaseModel):
|
||||||
|
mois: int
|
||||||
|
annee: int
|
||||||
|
immeuble: str
|
||||||
|
Lot: str
|
||||||
|
Type: str
|
||||||
|
Locataire: str
|
||||||
|
Loyers: float
|
||||||
|
Taxes: float
|
||||||
|
Provisions: float
|
||||||
|
Divers: float
|
||||||
|
Total: float
|
||||||
|
Réglés: float
|
||||||
|
Impayés: float
|
||||||
|
|
||||||
|
@field_validator(
|
||||||
|
"Loyers",
|
||||||
|
"Taxes",
|
||||||
|
"Provisions",
|
||||||
|
"Divers",
|
||||||
|
"Total",
|
||||||
|
"Réglés",
|
||||||
|
"Impayés",
|
||||||
|
mode="before",
|
||||||
|
)
|
||||||
|
def set_default_if_empty(cls, v):
|
||||||
|
if v == "":
|
||||||
|
return 0
|
||||||
|
return v
|
||||||
|
|
||||||
|
|
||||||
|
def is_it(page_text):
|
||||||
|
if "SITUATION DES LOCATAIRES" in page_text:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def parse_lot(string):
|
||||||
|
words = string.split(" ")
|
||||||
|
return {"Lot": "{:02d}".format(int(words[1])), "Type": " ".join(words[2:])}
|
||||||
|
|
||||||
|
|
||||||
|
def fsm():
|
||||||
|
current_state = "new_row"
|
||||||
|
row = {}
|
||||||
|
line = yield
|
||||||
|
while True:
|
||||||
|
if line == HEADER_LOC:
|
||||||
|
line = yield
|
||||||
|
elif current_state == "new_row":
|
||||||
|
if line[0] != "" and line[0] != "TOTAUX":
|
||||||
|
row.update(parse_lot(line[0]))
|
||||||
|
current_state = "add_loc"
|
||||||
|
line = yield
|
||||||
|
elif current_state == "add_loc":
|
||||||
|
if line[0] != "":
|
||||||
|
row["Locataire"] = line[0]
|
||||||
|
current_state = "add_totaux"
|
||||||
|
line = yield
|
||||||
|
elif current_state == "add_totaux":
|
||||||
|
if line[0] == "Totaux":
|
||||||
|
if line[6] is None:
|
||||||
|
row.update(
|
||||||
|
{
|
||||||
|
"Loyers": line[2],
|
||||||
|
"Taxes": line[3],
|
||||||
|
"Provisions": line[4],
|
||||||
|
"Divers": line[5],
|
||||||
|
"Total": line[7],
|
||||||
|
"Réglés": line[8],
|
||||||
|
"Impayés": line[9],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
row.update(
|
||||||
|
{
|
||||||
|
"Loyers": line[2],
|
||||||
|
"Taxes": line[3],
|
||||||
|
"Provisions": line[4],
|
||||||
|
"Divers": line[5],
|
||||||
|
"Total": line[6],
|
||||||
|
"Réglés": line[7],
|
||||||
|
"Impayés": line[8],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
line = yield row
|
||||||
|
row = {}
|
||||||
|
current_state = "new_row"
|
||||||
|
else:
|
||||||
|
line = yield
|
74
pdf_oralia/pages/patrimoine.py
Normal file
74
pdf_oralia/pages/patrimoine.py
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
from pydantic import BaseModel, field_validator
|
||||||
|
|
||||||
|
HEADER_PATRIMOINE = [
|
||||||
|
"Etage",
|
||||||
|
"Lots",
|
||||||
|
"Type de lot",
|
||||||
|
"Nom du Locataire",
|
||||||
|
"Loyer Annuel",
|
||||||
|
"Début Bail",
|
||||||
|
"Fin Bail",
|
||||||
|
"Entrée",
|
||||||
|
"Départ",
|
||||||
|
"Révisé le",
|
||||||
|
"U",
|
||||||
|
"Dépôt Gar.",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class Line(BaseModel):
|
||||||
|
mois: int
|
||||||
|
annee: int
|
||||||
|
immeuble: str
|
||||||
|
Etage: str
|
||||||
|
Lot: str
|
||||||
|
Type: str
|
||||||
|
Locataire: str
|
||||||
|
Loyer_annuel: int
|
||||||
|
Debut_bail: str
|
||||||
|
Fin_bail: str
|
||||||
|
Entree: str
|
||||||
|
Depart: str
|
||||||
|
Revision_bail: str
|
||||||
|
Usage: str
|
||||||
|
Depot_garantie: float
|
||||||
|
|
||||||
|
@field_validator("Loyer_annuel", "Depot_garantie", mode="before")
|
||||||
|
def set_default_if_empty(cls, v):
|
||||||
|
if v == "":
|
||||||
|
return 0
|
||||||
|
return v
|
||||||
|
|
||||||
|
|
||||||
|
def is_it(page_text):
|
||||||
|
if "VOTRE PATRIMOINE" in page_text:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def fsm():
|
||||||
|
current_state = "new_line"
|
||||||
|
row = {}
|
||||||
|
line = yield
|
||||||
|
while True:
|
||||||
|
if line == HEADER_PATRIMOINE:
|
||||||
|
line = yield
|
||||||
|
if current_state == "new_line":
|
||||||
|
if line[0] != "":
|
||||||
|
row = {
|
||||||
|
"Etage": line[0],
|
||||||
|
"Lot": line[1][-2:] if line[1] != "" else row["Lot"],
|
||||||
|
"Type": line[2] if line[2] != "" else row["Type"],
|
||||||
|
"Locataire": line[3],
|
||||||
|
"Loyer_annuel": line[4].replace(" ", ""),
|
||||||
|
"Debut_bail": line[5],
|
||||||
|
"Fin_bail": line[6],
|
||||||
|
"Entree": line[7],
|
||||||
|
"Depart": line[8],
|
||||||
|
"Revision_bail": line[9],
|
||||||
|
"Usage": line[10],
|
||||||
|
"Depot_garantie": line[11].replace(" ", ""),
|
||||||
|
}
|
||||||
|
line = yield row
|
||||||
|
else:
|
||||||
|
line = yield
|
34
pdf_oralia/pages/recapitulatif.py
Normal file
34
pdf_oralia/pages/recapitulatif.py
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
def is_it(page_text):
|
||||||
|
if "COMPTE RENDU DE GESTION" in page_text:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def extract(table, additionnal_fields: dict = {}):
|
||||||
|
"""Extract "remise commercial" from first page"""
|
||||||
|
extracted = []
|
||||||
|
header = table[0]
|
||||||
|
for row in table[1:]:
|
||||||
|
if "Remise commerciale gérance" in row:
|
||||||
|
r = dict()
|
||||||
|
for i, value in enumerate(row):
|
||||||
|
r[header[i]] = value
|
||||||
|
for k, v in additionnal_fields.items():
|
||||||
|
r[k] = v
|
||||||
|
extracted.append(r)
|
||||||
|
|
||||||
|
return extracted
|
||||||
|
|
||||||
|
# df = pd.DataFrame(table[1:], columns=table[0]).replace("", np.nan)
|
||||||
|
# df = df[
|
||||||
|
# df["RECAPITULATIF DES OPERATIONS"].str.contains(
|
||||||
|
# "Remise commerciale gérance", case=False, na=False
|
||||||
|
# )
|
||||||
|
# ]
|
||||||
|
#
|
||||||
|
# df.columns.values[0] = "Fournisseur"
|
||||||
|
# return df
|
@@ -3,33 +3,35 @@ from logging.config import dictConfig
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import click
|
import click
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
from .extract import extract_save
|
from .extract import extract_save, extract_plan
|
||||||
from .join import join_excel
|
from .join import join_excel
|
||||||
|
|
||||||
logging_config = dict(
|
|
||||||
version=1,
|
|
||||||
formatters={"f": {"format": "%(levelname)-8s %(name)-12s %(message)s"}},
|
|
||||||
handlers={
|
|
||||||
"h": {
|
|
||||||
"class": "logging.StreamHandler",
|
|
||||||
"formatter": "f",
|
|
||||||
"level": logging.DEBUG,
|
|
||||||
}
|
|
||||||
},
|
|
||||||
root={
|
|
||||||
"handlers": ["h"],
|
|
||||||
"level": logging.DEBUG,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
dictConfig(logging_config)
|
|
||||||
|
|
||||||
|
|
||||||
@click.group()
|
@click.group()
|
||||||
def main():
|
@click.option("--debug/--no-debug", default=False)
|
||||||
pass
|
def main(debug):
|
||||||
|
if debug:
|
||||||
|
logging_level = logging.DEBUG
|
||||||
|
else:
|
||||||
|
logging_level = logging.INFO
|
||||||
|
logging_config = dict(
|
||||||
|
version=1,
|
||||||
|
formatters={"f": {"format": "%(levelname)-8s %(name)-12s %(message)s"}},
|
||||||
|
handlers={
|
||||||
|
"h": {
|
||||||
|
"class": "logging.StreamHandler",
|
||||||
|
"formatter": "f",
|
||||||
|
"level": logging_level,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
root={
|
||||||
|
"handlers": ["h"],
|
||||||
|
"level": logging_level,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
dictConfig(logging_config)
|
||||||
|
|
||||||
|
|
||||||
@main.group()
|
@main.group()
|
||||||
@@ -40,31 +42,113 @@ def extract():
|
|||||||
@extract.command()
|
@extract.command()
|
||||||
@click.argument("pdf_file", required=1)
|
@click.argument("pdf_file", required=1)
|
||||||
@click.option("--dest", help="Où mettre les fichiers produits", default="")
|
@click.option("--dest", help="Où mettre les fichiers produits", default="")
|
||||||
def on(pdf_file, dest):
|
@click.option(
|
||||||
|
"--only-plan",
|
||||||
|
help="Ne produit rien mais indique les changements",
|
||||||
|
default=False,
|
||||||
|
is_flag=True,
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--force",
|
||||||
|
help="Écrase les fichiers produits précédemment",
|
||||||
|
default=False,
|
||||||
|
is_flag=True,
|
||||||
|
)
|
||||||
|
def on(pdf_file, dest, force, only_plan):
|
||||||
|
pdf_file = Path(pdf_file)
|
||||||
if not dest:
|
if not dest:
|
||||||
pdf_path = Path(pdf_file)
|
pdf_path = Path(pdf_file)
|
||||||
dest = pdf_path.parent
|
dest = pdf_path.parent
|
||||||
|
else:
|
||||||
|
dest = Path(dest)
|
||||||
|
|
||||||
extract_save(pdf_file, dest)
|
assert pdf_file.exists()
|
||||||
|
logging.info(f"Found {pdf_file}")
|
||||||
|
|
||||||
|
plan_dest = extract_plan(pdf_file, dest)
|
||||||
|
save = []
|
||||||
|
for k, p in plan_dest.items():
|
||||||
|
if not p.exists() or force:
|
||||||
|
save.append(k)
|
||||||
|
|
||||||
|
if only_plan:
|
||||||
|
for s in save:
|
||||||
|
logging.info(f"Planing to create {plan_dest[s]}")
|
||||||
|
else:
|
||||||
|
dest.mkdir(parents=True, exist_ok=True)
|
||||||
|
extract_save(pdf_file, dest, save)
|
||||||
|
|
||||||
|
|
||||||
@extract.command()
|
@extract.command()
|
||||||
@click.option("--src", help="Tous les fichiers dans folder", default="./")
|
@click.option(
|
||||||
|
"--src", help="Tous les fichiers dans folder (de façon récursive)", default="./"
|
||||||
|
)
|
||||||
@click.option("--dest", help="Où mettre les fichiers produits", default="./")
|
@click.option("--dest", help="Où mettre les fichiers produits", default="./")
|
||||||
def all(src, dest):
|
@click.option(
|
||||||
p = Path(src)
|
"--only-plan",
|
||||||
|
help="Ne produit rien mais indique les changements",
|
||||||
|
default=False,
|
||||||
|
is_flag=True,
|
||||||
|
)
|
||||||
|
@click.option(
|
||||||
|
"--force",
|
||||||
|
help="Écrase les fichiers produits précédemment",
|
||||||
|
default=False,
|
||||||
|
is_flag=True,
|
||||||
|
)
|
||||||
|
def all(src, dest, force, only_plan):
|
||||||
|
src_path = Path(src)
|
||||||
|
|
||||||
d = Path(dest)
|
dest = Path(dest)
|
||||||
d.mkdir(exist_ok=True)
|
dest.mkdir(exist_ok=True)
|
||||||
|
|
||||||
pdf_files = [x for x in p.iterdir() if ".pdf" in str(x)]
|
for pdf_file in src_path.rglob("**/*.pdf"):
|
||||||
for pdf_file in pdf_files:
|
relative_path = pdf_file.relative_to(src_path)
|
||||||
|
files_dest = dest / relative_path.parent
|
||||||
logging.info(f"Found {pdf_file}")
|
logging.info(f"Found {pdf_file}")
|
||||||
extract_save(pdf_file, d)
|
|
||||||
|
plan_dest = extract_plan(pdf_file, files_dest)
|
||||||
|
save = []
|
||||||
|
for k, p in plan_dest.items():
|
||||||
|
if not p.exists() or force:
|
||||||
|
save.append(k)
|
||||||
|
|
||||||
|
if only_plan:
|
||||||
|
for s in save:
|
||||||
|
logging.info(f"Planing to create {plan_dest[s]}")
|
||||||
|
else:
|
||||||
|
files_dest.mkdir(parents=True, exist_ok=True)
|
||||||
|
extract_save(pdf_file, files_dest, save)
|
||||||
|
|
||||||
|
|
||||||
@main.command()
|
@main.command()
|
||||||
@click.option("--src", help="Tous les fichiers dans src", default="./")
|
@click.option("--src", help="Tous les fichiers dans src", default="./")
|
||||||
@click.option("--dest", help="Où mettre les fichiers produits", default="")
|
@click.option("--dest", help="Où mettre les fichiers produits", default="")
|
||||||
def join(src, dest):
|
@click.option(
|
||||||
join_excel(src, dest, df_names=["charge", "locataire"])
|
"--force",
|
||||||
|
help="Ecraser si le ficher destination existe.",
|
||||||
|
default=False,
|
||||||
|
is_flag=True,
|
||||||
|
)
|
||||||
|
def join(src, dest, force):
|
||||||
|
"""Join tous les fichiers excel charge (resp locataire) de src dans un seul fichier charge.xlsx dans dist.
|
||||||
|
|
||||||
|
Exemple:
|
||||||
|
|
||||||
|
pdf-oralia join --src <dossier_source> --dest <dossier_destination>
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
dest_charge = f"{dest}/charge.xlsx"
|
||||||
|
if not force and Path(dest_charge).exists():
|
||||||
|
raise ValueError(f"The file {dest_charge} already exists")
|
||||||
|
dest_locataire = f"{dest}/locataire.xlsx"
|
||||||
|
if not force and Path(dest_locataire).exists():
|
||||||
|
raise ValueError(f"The file {dest_locataire} already exists")
|
||||||
|
|
||||||
|
if not Path(src).exists():
|
||||||
|
raise ValueError(f"The source directory ({src}) does not exists.")
|
||||||
|
join_excel(src, dest_charge, "*_charge.xlsx")
|
||||||
|
logging.info(f"Les données charges ont été concaténées dans {dest_charge}")
|
||||||
|
join_excel(src, dest_locataire, "*_locataire.xlsx")
|
||||||
|
logging.info(f"Les données locataires ont été concaténées dans {dest_locataire}")
|
||||||
|
4516
poetry.lock
generated
4516
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,6 @@
|
|||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "pdf-oralia"
|
name = "pdf-oralia"
|
||||||
version = "{{VERSION_PLACEHOLDER}}"
|
version = "0"
|
||||||
description = ""
|
description = ""
|
||||||
authors = ["Bertrand Benjamin <benjamin.bertrand@opytex.org>"]
|
authors = ["Bertrand Benjamin <benjamin.bertrand@opytex.org>"]
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
@@ -13,13 +13,14 @@ pdf-oralia = "pdf_oralia.scripts:main"
|
|||||||
python = "^3.10"
|
python = "^3.10"
|
||||||
click = "^8.1.3"
|
click = "^8.1.3"
|
||||||
pdfplumber = "^0.7.4"
|
pdfplumber = "^0.7.4"
|
||||||
pandas = "^1.5.0"
|
pandas = "^2.2.3"
|
||||||
openpyxl = "^3.0.10"
|
openpyxl = "^3.0.10"
|
||||||
|
|
||||||
|
|
||||||
[tool.poetry.group.dev.dependencies]
|
[tool.poetry.group.dev.dependencies]
|
||||||
pre-commit = "^2.20.0"
|
pre-commit = "^2.20.0"
|
||||||
jupyter = "^1.0.0"
|
jupyter = "^1.0.0"
|
||||||
|
tabulate = "^0.9.0"
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = ["poetry-core"]
|
requires = ["poetry-core"]
|
||||||
|
2
renovate.json
Normal file
2
renovate.json
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
{
|
||||||
|
}
|
@@ -1,76 +1,5 @@
|
|||||||
argon2-cffi==21.3.0
|
pdfplumber
|
||||||
argon2-cffi-bindings==21.2.0
|
numpy
|
||||||
asttokens==2.0.8
|
pandas
|
||||||
attrs==22.1.0
|
click
|
||||||
backcall==0.2.0
|
openpyxl
|
||||||
beautifulsoup4==4.11.1
|
|
||||||
bleach==5.0.1
|
|
||||||
cffi==1.15.1
|
|
||||||
charset-normalizer==2.1.1
|
|
||||||
cryptography==38.0.1
|
|
||||||
debugpy==1.6.3
|
|
||||||
decorator==5.1.1
|
|
||||||
defusedxml==0.7.1
|
|
||||||
entrypoints==0.4
|
|
||||||
et-xmlfile==1.1.0
|
|
||||||
executing==1.1.0
|
|
||||||
fastjsonschema==2.16.2
|
|
||||||
ipykernel==6.16.0
|
|
||||||
ipython==8.5.0
|
|
||||||
ipython-genutils==0.2.0
|
|
||||||
ipywidgets==8.0.2
|
|
||||||
jedi==0.18.1
|
|
||||||
Jinja2==3.1.2
|
|
||||||
jsonschema==4.16.0
|
|
||||||
jupyter==1.0.0
|
|
||||||
jupyter-console==6.4.4
|
|
||||||
jupyter-core==4.11.1
|
|
||||||
jupyter_client==7.3.5
|
|
||||||
jupyterlab-pygments==0.2.2
|
|
||||||
jupyterlab-widgets==3.0.3
|
|
||||||
lxml==4.9.1
|
|
||||||
MarkupSafe==2.1.1
|
|
||||||
matplotlib-inline==0.1.6
|
|
||||||
mistune==2.0.4
|
|
||||||
nbclient==0.6.8
|
|
||||||
nbconvert==7.0.0
|
|
||||||
nbformat==5.6.1
|
|
||||||
nest-asyncio==1.5.5
|
|
||||||
notebook==6.4.12
|
|
||||||
numpy==1.23.3
|
|
||||||
openpyxl==3.0.10
|
|
||||||
packaging==21.3
|
|
||||||
pandas==1.5.0
|
|
||||||
pandocfilters==1.5.0
|
|
||||||
parso==0.8.3
|
|
||||||
pdfminer.six==20220524
|
|
||||||
pdfplumber==0.7.4
|
|
||||||
pexpect==4.8.0
|
|
||||||
pickleshare==0.7.5
|
|
||||||
Pillow==9.2.0
|
|
||||||
prometheus-client==0.14.1
|
|
||||||
prompt-toolkit==3.0.31
|
|
||||||
psutil==5.9.2
|
|
||||||
ptyprocess==0.7.0
|
|
||||||
pure-eval==0.2.2
|
|
||||||
pycparser==2.21
|
|
||||||
Pygments==2.13.0
|
|
||||||
pyparsing==3.0.9
|
|
||||||
pyrsistent==0.18.1
|
|
||||||
python-dateutil==2.8.2
|
|
||||||
pytz==2022.2.1
|
|
||||||
pyzmq==24.0.1
|
|
||||||
qtconsole==5.3.2
|
|
||||||
QtPy==2.2.0
|
|
||||||
Send2Trash==1.8.0
|
|
||||||
six==1.16.0
|
|
||||||
soupsieve==2.3.2.post1
|
|
||||||
stack-data==0.5.1
|
|
||||||
terminado==0.15.0
|
|
||||||
tinycss2==1.1.1
|
|
||||||
tornado==6.2
|
|
||||||
traitlets==5.4.0
|
|
||||||
Wand==0.6.10
|
|
||||||
wcwidth==0.2.5
|
|
||||||
webencodings==0.5.1
|
|
||||||
widgetsnbextension==4.0.3
|
|
||||||
|
Reference in New Issue
Block a user