Compare commits

..

No commits in common. "2761c3ed7b4b142f6d63f1b3513d04e52ddaf9e2" and "c56241fe4ce314c43995bf93b00629128f450e7c" have entirely different histories.

5 changed files with 14 additions and 56 deletions

View File

@ -12,7 +12,7 @@ steps:
image: python:3.11
commands:
- echo ${DRONE_TAG}
- sed -i 's/version = "[^"]*"/version = "${DRONE_TAG}"/g' pyproject.toml
- sed -i "s/VERSION_PLACEHOLDER/${DRONE_TAG}/g" pyproject.toml
- curl -sSL https://install.python-poetry.org | python3 -
- export PATH="/root/.local/bin:$PATH"
- poetry --version

View File

@ -3,20 +3,7 @@ import re
import numpy as np
import pandas as pd
RECAPITULATIF_DES_OPERATIONS = 1
DF_TYPES = {
"Fournisseur": str,
"RECAPITULATIF DES OPERATIONS": str,
"Débits": float,
"Crédits": float,
"Dont T.V.A.": float,
"Locatif": float,
"Déductible": float,
"immeuble": str,
"mois": str,
"annee": str,
"lot": str,
}
RECAPITULATIF_DES_OPERATION = 1
def is_it(page_text):
@ -30,7 +17,7 @@ def is_it(page_text):
def get_lot(txt):
"""Return lot number from "RECAPITULATIF DES OPERATIONS" """
regex = r"[BSM](\d+)(?=\s*-)"
regex = r"[BSM](\d+)\s-"
result = re.findall(regex, txt)
if result:
return "{:02d}".format(int(result[0]))
@ -40,14 +27,14 @@ def get_lot(txt):
def keep_row(row):
return not any(
[
word.lower() in row[RECAPITULATIF_DES_OPERATIONS].lower()
word.lower() in row[RECAPITULATIF_DES_OPERATION].lower()
for word in ["TOTAL", "TOTAUX", "Solde créditeur", "Solde débiteur"]
]
)
def extract(table, additionnal_fields: dict = {}):
"""Turn table to dictionary with additional fields"""
"""Turn table to dictionary with additionnal fields"""
extracted = []
header = table[0]
for row in table[1:]:
@ -62,7 +49,9 @@ def extract(table, additionnal_fields: dict = {}):
for k, v in additionnal_fields.items():
r[k] = v
if "honoraire" in row[RECAPITULATIF_DES_OPERATIONS]:
r["lot"] = get_lot(row[RECAPITULATIF_DES_OPERATION])
if "honoraire" in row[RECAPITULATIF_DES_OPERATION]:
r["Fournisseur"] = "IMI GERANCE"
extracted.append(r)
@ -80,9 +69,4 @@ def table2df(tables):
)
df["Fournisseur"] = df["Fournisseur"].fillna(method="ffill")
dfs.append(df)
df = pd.concat(dfs)
df["immeuble"] = df["immeuble"].apply(lambda x: x[0].capitalize())
print(df.columns)
df["lot"] = df["RECAPITULATIF DES OPERATIONS"].apply(get_lot)
return df.astype(DF_TYPES, errors="ignore")
return pd.concat(dfs)

View File

@ -1,22 +1,5 @@
import pandas as pd
DF_TYPES = {
"Locataires": str,
"Période": str,
"Loyers": float,
"Taxes": float,
"Provisions": float,
"Divers": str,
"Total": float,
"Réglés": float,
"Impayés": float,
"immeuble": str,
"mois": str,
"annee": str,
"Lot": str,
"Type": str,
}
def is_it(page_text):
if "SITUATION DES LOCATAIRES" in page_text:
@ -84,12 +67,6 @@ def parse_lot(string):
return {"Lot": "{:02d}".format(int(words[1])), "Type": " ".join(words[2:])}
def clean_type(string):
if "appartement" in string.lower():
return string[-2:]
return string
def join_row(table):
joined = []
for row in table:
@ -139,7 +116,7 @@ def join_row(table):
)
joined.append(row)
else:
pass
print(row)
return joined
@ -154,9 +131,4 @@ def flat_tables(tables):
def table2df(tables):
tables = flat_tables(tables)
joined = join_row(tables)
df = pd.DataFrame.from_records(joined)
df["immeuble"] = df["immeuble"].apply(lambda x: x[0].capitalize())
df["Type"] = df["Type"].apply(clean_type)
return df.astype(DF_TYPES, errors="ignore")
return pd.DataFrame.from_records(joined)

View File

@ -3,8 +3,10 @@ from logging.config import dictConfig
from pathlib import Path
import click
import pandas as pd
from .extract import extract_save
from .join import join_excel
logging_config = dict(
version=1,

View File

@ -1,6 +1,6 @@
[tool.poetry]
name = "pdf-oralia"
version = "dev"
version = "VERSION_PLACEHOLDER"
description = ""
authors = ["Bertrand Benjamin <benjamin.bertrand@opytex.org>"]
readme = "README.md"