8 Commits

5 changed files with 33 additions and 13 deletions

View File

@@ -12,7 +12,7 @@ steps:
image: python:3.11 image: python:3.11
commands: commands:
- echo ${DRONE_TAG} - echo ${DRONE_TAG}
- sed -i "s/VERSION_PLACEHOLDER/${DRONE_TAG}/g" pyproject.toml - sed -i 's/version = "[^"]*"/version = "${DRONE_TAG}"/g' pyproject.toml
- curl -sSL https://install.python-poetry.org | python3 - - curl -sSL https://install.python-poetry.org | python3 -
- export PATH="/root/.local/bin:$PATH" - export PATH="/root/.local/bin:$PATH"
- poetry --version - poetry --version
@@ -22,6 +22,19 @@ steps:
PYPI_TOKEN: PYPI_TOKEN:
from_secret: pypi_token from_secret: pypi_token
- name: Notify on matrix
image: plugins/matrix
environment:
MATRIX_ROOMID:
from_secret: MATRIX_ROOMID
MATRIX_ACCESSTOKEN:
from_secret: MATRIX_ACCESSTOKEN
MATRIX_USERID:
from_secret: MATRIX_USERID
settings:
homeserver: https://matrix.poneyworld.net
template: "Une nouvelle version (${DRONE_TAG}) de pdf-oralia est publiée!"
when: when:
event: event:
include: include:

View File

@@ -45,7 +45,7 @@ def from_pdf(pdf):
charge_tables = [] charge_tables = []
patrimoie_tables = [] patrimoie_tables = []
for page in pdf.pages: for page_number, page in enumerate(pdf.pages):
page_text = page.extract_text() page_text = page.extract_text()
date = extract_date(page_text) date = extract_date(page_text)
additionnal_fields = { additionnal_fields = {
@@ -76,7 +76,7 @@ def from_pdf(pdf):
pass pass
else: else:
raise ValueError("Page non reconnu") logging.warning(f"Page {page_number+1} non reconnu. Page ignorée.")
df_charge = charge.table2df(recapitulatif_tables + charge_tables) df_charge = charge.table2df(recapitulatif_tables + charge_tables)
df_loc = locataire.table2df(loc_tables) df_loc = locataire.table2df(loc_tables)

View File

@@ -17,6 +17,7 @@ DF_TYPES = {
"annee": str, "annee": str,
"lot": str, "lot": str,
} }
DEFAULT_FOURNISSEUR = "ROSIER MODICA MOTTEROZ SA"
def is_it(page_text): def is_it(page_text):
@@ -31,7 +32,10 @@ def is_it(page_text):
def get_lot(txt): def get_lot(txt):
"""Return lot number from "RECAPITULATIF DES OPERATIONS" """ """Return lot number from "RECAPITULATIF DES OPERATIONS" """
regex = r"[BSM](\d+)(?=\s*-)" regex = r"[BSM](\d+)(?=\s*-)"
try:
result = re.findall(regex, txt) result = re.findall(regex, txt)
except TypeError:
return "*"
if result: if result:
return "{:02d}".format(int(result[0])) return "{:02d}".format(int(result[0]))
return "*" return "*"
@@ -62,8 +66,8 @@ def extract(table, additionnal_fields: dict = {}):
for k, v in additionnal_fields.items(): for k, v in additionnal_fields.items():
r[k] = v r[k] = v
if "honoraire" in row[RECAPITULATIF_DES_OPERATIONS]: if "honoraire" in row[RECAPITULATIF_DES_OPERATIONS].lower():
r["Fournisseur"] = "IMI GERANCE" r["Fournisseur"] = DEFAULT_FOURNISSEUR
extracted.append(r) extracted.append(r)
@@ -83,6 +87,5 @@ def table2df(tables):
df = pd.concat(dfs) df = pd.concat(dfs)
df["immeuble"] = df["immeuble"].apply(lambda x: x[0].capitalize()) df["immeuble"] = df["immeuble"].apply(lambda x: x[0].capitalize())
print(df.columns)
df["lot"] = df["RECAPITULATIF DES OPERATIONS"].apply(get_lot) df["lot"] = df["RECAPITULATIF DES OPERATIONS"].apply(get_lot)
return df.astype(DF_TYPES, errors="ignore") return df.astype(DF_TYPES)

View File

@@ -1,3 +1,4 @@
import numpy as np
import pandas as pd import pandas as pd
DF_TYPES = { DF_TYPES = {
@@ -33,7 +34,7 @@ def is_drop(row):
def extract(table, additionnal_fields: dict = {}): def extract(table, additionnal_fields: dict = {}):
"""Turn table to dictionary with additionnal fields""" """Turn table to dictionary with additional fields"""
extracted = [] extracted = []
header = table[0] header = table[0]
for row in table[1:]: for row in table[1:]:
@@ -138,8 +139,6 @@ def join_row(table):
} }
) )
joined.append(row) joined.append(row)
else:
pass
return joined return joined
@@ -159,4 +158,9 @@ def table2df(tables):
df["immeuble"] = df["immeuble"].apply(lambda x: x[0].capitalize()) df["immeuble"] = df["immeuble"].apply(lambda x: x[0].capitalize())
df["Type"] = df["Type"].apply(clean_type) df["Type"] = df["Type"].apply(clean_type)
return df.astype(DF_TYPES, errors="ignore") numeric_cols = [k for k, v in DF_TYPES.items() if v == float]
df[numeric_cols] = df[numeric_cols].replace("", np.nan)
df = df.drop(df[(df["Locataires"] == "") & (df["Période"] == "")].index)
return df.astype(DF_TYPES)

View File

@@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "pdf-oralia" name = "pdf-oralia"
version = "1.dev" version = "dev"
description = "" description = ""
authors = ["Bertrand Benjamin <benjamin.bertrand@opytex.org>"] authors = ["Bertrand Benjamin <benjamin.bertrand@opytex.org>"]
readme = "README.md" readme = "README.md"