Compare commits
5 Commits
c56241fe4c
...
2761c3ed7b
Author | SHA1 | Date | |
---|---|---|---|
2761c3ed7b | |||
5692898137 | |||
44d4150910 | |||
223f25130d | |||
1a86b7bc26 |
@ -12,7 +12,7 @@ steps:
|
|||||||
image: python:3.11
|
image: python:3.11
|
||||||
commands:
|
commands:
|
||||||
- echo ${DRONE_TAG}
|
- echo ${DRONE_TAG}
|
||||||
- sed -i "s/VERSION_PLACEHOLDER/${DRONE_TAG}/g" pyproject.toml
|
- sed -i 's/version = "[^"]*"/version = "${DRONE_TAG}"/g' pyproject.toml
|
||||||
- curl -sSL https://install.python-poetry.org | python3 -
|
- curl -sSL https://install.python-poetry.org | python3 -
|
||||||
- export PATH="/root/.local/bin:$PATH"
|
- export PATH="/root/.local/bin:$PATH"
|
||||||
- poetry --version
|
- poetry --version
|
||||||
|
@ -3,7 +3,20 @@ import re
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
RECAPITULATIF_DES_OPERATION = 1
|
RECAPITULATIF_DES_OPERATIONS = 1
|
||||||
|
DF_TYPES = {
|
||||||
|
"Fournisseur": str,
|
||||||
|
"RECAPITULATIF DES OPERATIONS": str,
|
||||||
|
"Débits": float,
|
||||||
|
"Crédits": float,
|
||||||
|
"Dont T.V.A.": float,
|
||||||
|
"Locatif": float,
|
||||||
|
"Déductible": float,
|
||||||
|
"immeuble": str,
|
||||||
|
"mois": str,
|
||||||
|
"annee": str,
|
||||||
|
"lot": str,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def is_it(page_text):
|
def is_it(page_text):
|
||||||
@ -17,7 +30,7 @@ def is_it(page_text):
|
|||||||
|
|
||||||
def get_lot(txt):
|
def get_lot(txt):
|
||||||
"""Return lot number from "RECAPITULATIF DES OPERATIONS" """
|
"""Return lot number from "RECAPITULATIF DES OPERATIONS" """
|
||||||
regex = r"[BSM](\d+)\s-"
|
regex = r"[BSM](\d+)(?=\s*-)"
|
||||||
result = re.findall(regex, txt)
|
result = re.findall(regex, txt)
|
||||||
if result:
|
if result:
|
||||||
return "{:02d}".format(int(result[0]))
|
return "{:02d}".format(int(result[0]))
|
||||||
@ -27,14 +40,14 @@ def get_lot(txt):
|
|||||||
def keep_row(row):
|
def keep_row(row):
|
||||||
return not any(
|
return not any(
|
||||||
[
|
[
|
||||||
word.lower() in row[RECAPITULATIF_DES_OPERATION].lower()
|
word.lower() in row[RECAPITULATIF_DES_OPERATIONS].lower()
|
||||||
for word in ["TOTAL", "TOTAUX", "Solde créditeur", "Solde débiteur"]
|
for word in ["TOTAL", "TOTAUX", "Solde créditeur", "Solde débiteur"]
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def extract(table, additionnal_fields: dict = {}):
|
def extract(table, additionnal_fields: dict = {}):
|
||||||
"""Turn table to dictionary with additionnal fields"""
|
"""Turn table to dictionary with additional fields"""
|
||||||
extracted = []
|
extracted = []
|
||||||
header = table[0]
|
header = table[0]
|
||||||
for row in table[1:]:
|
for row in table[1:]:
|
||||||
@ -49,9 +62,7 @@ def extract(table, additionnal_fields: dict = {}):
|
|||||||
for k, v in additionnal_fields.items():
|
for k, v in additionnal_fields.items():
|
||||||
r[k] = v
|
r[k] = v
|
||||||
|
|
||||||
r["lot"] = get_lot(row[RECAPITULATIF_DES_OPERATION])
|
if "honoraire" in row[RECAPITULATIF_DES_OPERATIONS]:
|
||||||
|
|
||||||
if "honoraire" in row[RECAPITULATIF_DES_OPERATION]:
|
|
||||||
r["Fournisseur"] = "IMI GERANCE"
|
r["Fournisseur"] = "IMI GERANCE"
|
||||||
|
|
||||||
extracted.append(r)
|
extracted.append(r)
|
||||||
@ -69,4 +80,9 @@ def table2df(tables):
|
|||||||
)
|
)
|
||||||
df["Fournisseur"] = df["Fournisseur"].fillna(method="ffill")
|
df["Fournisseur"] = df["Fournisseur"].fillna(method="ffill")
|
||||||
dfs.append(df)
|
dfs.append(df)
|
||||||
return pd.concat(dfs)
|
df = pd.concat(dfs)
|
||||||
|
|
||||||
|
df["immeuble"] = df["immeuble"].apply(lambda x: x[0].capitalize())
|
||||||
|
print(df.columns)
|
||||||
|
df["lot"] = df["RECAPITULATIF DES OPERATIONS"].apply(get_lot)
|
||||||
|
return df.astype(DF_TYPES, errors="ignore")
|
||||||
|
@ -1,5 +1,22 @@
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
|
DF_TYPES = {
|
||||||
|
"Locataires": str,
|
||||||
|
"Période": str,
|
||||||
|
"Loyers": float,
|
||||||
|
"Taxes": float,
|
||||||
|
"Provisions": float,
|
||||||
|
"Divers": str,
|
||||||
|
"Total": float,
|
||||||
|
"Réglés": float,
|
||||||
|
"Impayés": float,
|
||||||
|
"immeuble": str,
|
||||||
|
"mois": str,
|
||||||
|
"annee": str,
|
||||||
|
"Lot": str,
|
||||||
|
"Type": str,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def is_it(page_text):
|
def is_it(page_text):
|
||||||
if "SITUATION DES LOCATAIRES" in page_text:
|
if "SITUATION DES LOCATAIRES" in page_text:
|
||||||
@ -67,6 +84,12 @@ def parse_lot(string):
|
|||||||
return {"Lot": "{:02d}".format(int(words[1])), "Type": " ".join(words[2:])}
|
return {"Lot": "{:02d}".format(int(words[1])), "Type": " ".join(words[2:])}
|
||||||
|
|
||||||
|
|
||||||
|
def clean_type(string):
|
||||||
|
if "appartement" in string.lower():
|
||||||
|
return string[-2:]
|
||||||
|
return string
|
||||||
|
|
||||||
|
|
||||||
def join_row(table):
|
def join_row(table):
|
||||||
joined = []
|
joined = []
|
||||||
for row in table:
|
for row in table:
|
||||||
@ -116,7 +139,7 @@ def join_row(table):
|
|||||||
)
|
)
|
||||||
joined.append(row)
|
joined.append(row)
|
||||||
else:
|
else:
|
||||||
print(row)
|
pass
|
||||||
|
|
||||||
return joined
|
return joined
|
||||||
|
|
||||||
@ -131,4 +154,9 @@ def flat_tables(tables):
|
|||||||
def table2df(tables):
|
def table2df(tables):
|
||||||
tables = flat_tables(tables)
|
tables = flat_tables(tables)
|
||||||
joined = join_row(tables)
|
joined = join_row(tables)
|
||||||
return pd.DataFrame.from_records(joined)
|
df = pd.DataFrame.from_records(joined)
|
||||||
|
|
||||||
|
df["immeuble"] = df["immeuble"].apply(lambda x: x[0].capitalize())
|
||||||
|
df["Type"] = df["Type"].apply(clean_type)
|
||||||
|
|
||||||
|
return df.astype(DF_TYPES, errors="ignore")
|
||||||
|
@ -3,10 +3,8 @@ from logging.config import dictConfig
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import click
|
import click
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
from .extract import extract_save
|
from .extract import extract_save
|
||||||
from .join import join_excel
|
|
||||||
|
|
||||||
logging_config = dict(
|
logging_config = dict(
|
||||||
version=1,
|
version=1,
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "pdf-oralia"
|
name = "pdf-oralia"
|
||||||
version = "VERSION_PLACEHOLDER"
|
version = "dev"
|
||||||
description = ""
|
description = ""
|
||||||
authors = ["Bertrand Benjamin <benjamin.bertrand@opytex.org>"]
|
authors = ["Bertrand Benjamin <benjamin.bertrand@opytex.org>"]
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
|
Loading…
Reference in New Issue
Block a user