Feat: better extraction of lot

This commit is contained in:
Bertrand Benjamin 2023-06-27 10:23:02 +02:00
parent 18c8282f63
commit ceebfb0a38

View File

@ -1,3 +1,5 @@
import re
import numpy as np import numpy as np
import pandas as pd import pandas as pd
@ -13,15 +15,13 @@ def is_it(page_text):
return False return False
def get_lot(x): def get_lot(txt):
"""Return lot number from "RECAPITULATIF DES OPERATIONS" """ """Return lot number from "RECAPITULATIF DES OPERATIONS" """
if x[:2].isdigit(): regex = r"[BSM](\d+)\s-"
return x[:2] result = re.findall(regex, txt)
if x[:1].isdigit(): if result:
return "0" + x[:1] return "{:02d}".format(int(result[0]))
if x[:2] == "PC": return "*"
return "PC"
return ""
def keep_row(row): def keep_row(row):