diff --git a/pdf_oralia/extract_locataire.py b/pdf_oralia/extract_locataire.py index b77987b..d47f27f 100644 --- a/pdf_oralia/extract_locataire.py +++ b/pdf_oralia/extract_locataire.py @@ -7,7 +7,7 @@ def parse_above_loc(content): row = {} app, loc, *_ = content.split("\n") app_ = app.split(" ") - row["lot"] = app_[1] + row["lot"] = f"{int(app_[1]):02d}" row["type"] = " ".join(app_[2:]) row["locataire"] = loc return pd.Series(row) diff --git a/pdf_oralia/scripts.py b/pdf_oralia/scripts.py index 6e6ba02..54bb58c 100644 --- a/pdf_oralia/scripts.py +++ b/pdf_oralia/scripts.py @@ -73,7 +73,9 @@ def join(src, dest): } for file in p.glob("*.xlsx"): year, month, immeuble, table = file.stem.split("_") - df = pd.read_excel(file).assign(annee=year, mois=month, immeuble=immeuble[:3]) + df = pd.read_excel(file, dtype={"lot": str}).assign( + annee=year, mois=month, immeuble=immeuble[:3] + ) dfs[table].append(df) for tablename, datas in dfs.items(): df = pd.concat(datas)