Feat: improve version name for drone

Feat: lot s'adapte meme sans espace avant le tiret
Feat: remove Appartement in type
2023-06-30 13:51:04 +02:00 · 2023-06-28 10:49:36 +02:00 · 2023-06-28 10:44:56 +02:00 · 2023-06-28 10:30:40 +02:00 · 2023-06-28 09:45:18 +02:00
5 changed files with 56 additions and 14 deletions
--- a/.drone.yml
+++ b/.drone.yml
@@ -12,7 +12,7 @@ steps:
    image: python:3.11
    commands:
      - echo ${DRONE_TAG}
-      - sed -i "s/VERSION_PLACEHOLDER/${DRONE_TAG}/g" pyproject.toml
+      - sed -i 's/version = "[^"]*"/version = "${DRONE_TAG}"/g' pyproject.toml
      - curl -sSL https://install.python-poetry.org | python3 -
      - export PATH="/root/.local/bin:$PATH"
      - poetry --version
--- a/pdf_oralia/pages/charge.py
+++ b/pdf_oralia/pages/charge.py
@@ -3,7 +3,20 @@ import re
 import numpy as np
 import pandas as pd

-RECAPITULATIF_DES_OPERATION = 1
+RECAPITULATIF_DES_OPERATIONS = 1
+DF_TYPES = {
+    "Fournisseur": str,
+    "RECAPITULATIF DES OPERATIONS": str,
+    "Débits": float,
+    "Crédits": float,
+    "Dont T.V.A.": float,
+    "Locatif": float,
+    "Déductible": float,
+    "immeuble": str,
+    "mois": str,
+    "annee": str,
+    "lot": str,
+}


 def is_it(page_text):
@@ -17,7 +30,7 @@ def is_it(page_text):

 def get_lot(txt):
    """Return lot number from "RECAPITULATIF DES OPERATIONS" """
-    regex = r"[BSM](\d+)\s-"
+    regex = r"[BSM](\d+)(?=\s*-)"
    result = re.findall(regex, txt)
    if result:
        return "{:02d}".format(int(result[0]))
@@ -27,14 +40,14 @@ def get_lot(txt):
 def keep_row(row):
    return not any(
        [
-            word.lower() in row[RECAPITULATIF_DES_OPERATION].lower()
+            word.lower() in row[RECAPITULATIF_DES_OPERATIONS].lower()
            for word in ["TOTAL", "TOTAUX", "Solde créditeur", "Solde débiteur"]
        ]
    )


 def extract(table, additionnal_fields: dict = {}):
-    """Turn table to dictionary with additionnal fields"""
+    """Turn table to dictionary with additional fields"""
    extracted = []
    header = table[0]
    for row in table[1:]:
@@ -49,9 +62,7 @@ def extract(table, additionnal_fields: dict = {}):
            for k, v in additionnal_fields.items():
                r[k] = v

-            r["lot"] = get_lot(row[RECAPITULATIF_DES_OPERATION])
-
-            if "honoraire" in row[RECAPITULATIF_DES_OPERATION]:
+            if "honoraire" in row[RECAPITULATIF_DES_OPERATIONS]:
                r["Fournisseur"] = "IMI GERANCE"

            extracted.append(r)
@@ -69,4 +80,9 @@ def table2df(tables):
        )
        df["Fournisseur"] = df["Fournisseur"].fillna(method="ffill")
        dfs.append(df)
-    return pd.concat(dfs)
+    df = pd.concat(dfs)
+
+    df["immeuble"] = df["immeuble"].apply(lambda x: x[0].capitalize())
+    print(df.columns)
+    df["lot"] = df["RECAPITULATIF DES OPERATIONS"].apply(get_lot)
+    return df.astype(DF_TYPES, errors="ignore")
--- a/pdf_oralia/pages/locataire.py
+++ b/pdf_oralia/pages/locataire.py
@@ -1,5 +1,22 @@
 import pandas as pd

+DF_TYPES = {
+    "Locataires": str,
+    "Période": str,
+    "Loyers": float,
+    "Taxes": float,
+    "Provisions": float,
+    "Divers": str,
+    "Total": float,
+    "Réglés": float,
+    "Impayés": float,
+    "immeuble": str,
+    "mois": str,
+    "annee": str,
+    "Lot": str,
+    "Type": str,
+}
+

 def is_it(page_text):
    if "SITUATION DES LOCATAIRES" in page_text:
@@ -67,6 +84,12 @@ def parse_lot(string):
    return {"Lot": "{:02d}".format(int(words[1])), "Type": " ".join(words[2:])}


+def clean_type(string):
+    if "appartement" in string.lower():
+        return string[-2:]
+    return string
+
+
 def join_row(table):
    joined = []
    for row in table:
@@ -116,7 +139,7 @@ def join_row(table):
                )
                joined.append(row)
            else:
-                print(row)
+                pass

    return joined

@@ -131,4 +154,9 @@ def flat_tables(tables):
 def table2df(tables):
    tables = flat_tables(tables)
    joined = join_row(tables)
-    return pd.DataFrame.from_records(joined)
+    df = pd.DataFrame.from_records(joined)
+
+    df["immeuble"] = df["immeuble"].apply(lambda x: x[0].capitalize())
+    df["Type"] = df["Type"].apply(clean_type)
+
+    return df.astype(DF_TYPES, errors="ignore")
--- a/pdf_oralia/scripts.py
+++ b/pdf_oralia/scripts.py
@@ -3,10 +3,8 @@ from logging.config import dictConfig
 from pathlib import Path

 import click
-import pandas as pd

 from .extract import extract_save
-from .join import join_excel

 logging_config = dict(
    version=1,
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "pdf-oralia"
-version = "VERSION_PLACEHOLDER"
+version = "dev"
 description = ""
 authors = ["Bertrand Benjamin <benjamin.bertrand@opytex.org>"]
 readme = "README.md"
Author	SHA1	Message	Date
Bertrand Benjamin	2761c3ed7b	Feat: improve version name for drone	2023-06-30 13:51:04 +02:00
Bertrand Benjamin	5692898137	Feat: lot s'adapte meme sans espace avant le tiret	2023-06-28 10:49:36 +02:00
Bertrand Benjamin	44d4150910	Feat: remove Appartement in type	2023-06-28 10:44:56 +02:00
Bertrand Benjamin	223f25130d	Feat: type df columns	2023-06-28 10:30:40 +02:00
Bertrand Benjamin	1a86b7bc26	Fix: remove useless import	2023-06-28 09:45:18 +02:00