diff --git a/pdf_oralia/pages/charge.py b/pdf_oralia/pages/charge.py index a6b6abf..d10c4b6 100644 --- a/pdf_oralia/pages/charge.py +++ b/pdf_oralia/pages/charge.py @@ -1,3 +1,5 @@ +import re + import numpy as np import pandas as pd @@ -13,15 +15,13 @@ def is_it(page_text): return False -def get_lot(x): +def get_lot(txt): """Return lot number from "RECAPITULATIF DES OPERATIONS" """ - if x[:2].isdigit(): - return x[:2] - if x[:1].isdigit(): - return "0" + x[:1] - if x[:2] == "PC": - return "PC" - return "" + regex = r"[BSM](\d+)\s-" + result = re.findall(regex, txt) + if result: + return "{:02d}".format(int(result[0])) + return "*" def keep_row(row):