Feat: extract "remise commercial" from 1st page
This commit is contained in:
parent
a06720b93c
commit
8397e359b0
@ -1,9 +1,10 @@
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
import pdfplumber
|
||||
|
||||
from .extract_charge import extract_charge
|
||||
from .extract_charge import extract_charge, extract_remise_com
|
||||
from .extract_locataire import extract_situation_loc
|
||||
|
||||
charge_table_settings = {
|
||||
@ -16,6 +17,11 @@ def extract_from_pdf(pdf, charge_dest, location_dest):
|
||||
"""Build charge_dest and location_dest xlsx file from pdf"""
|
||||
loc_tables = []
|
||||
charge_table = []
|
||||
|
||||
df_1st_charge = extract_remise_com(
|
||||
pdf.pages[0].extract_table(charge_table_settings)
|
||||
)
|
||||
|
||||
for page in pdf.pages[1:]:
|
||||
page_text = page.extract_text()
|
||||
situation_loc_line = [
|
||||
@ -35,7 +41,8 @@ def extract_from_pdf(pdf, charge_dest, location_dest):
|
||||
charge_table = page.extract_table(charge_table_settings)
|
||||
|
||||
df_charge = extract_charge(charge_table)
|
||||
df_charge.to_excel(charge_dest, sheet_name="Charges", index=False)
|
||||
df_charge_with_1st = pd.concat([df_1st_charge, df_charge])
|
||||
df_charge_with_1st.to_excel(charge_dest, sheet_name="Charges", index=False)
|
||||
logging.info(f"{charge_dest} saved")
|
||||
|
||||
df_loc = extract_situation_loc(loc_tables, mois=mois, annee=annee)
|
||||
|
@ -51,3 +51,13 @@ def extract_charge(table):
|
||||
)
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def extract_remise_com(table):
|
||||
"""Extract "remise commercial" from first page"""
|
||||
df = pd.DataFrame(table[1:], columns=table[0]).replace("", np.nan)
|
||||
return df[
|
||||
df["RECAPITULATIF DES OPERATIONS"].str.contains(
|
||||
"Remise commerciale gérance", case=False, na=False
|
||||
)
|
||||
]
|
||||
|
Loading…
Reference in New Issue
Block a user