Feat: extract "remise commercial" from 1st page
This commit is contained in:
parent
a06720b93c
commit
8397e359b0
@ -1,9 +1,10 @@
|
|||||||
import logging
|
import logging
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
import pdfplumber
|
import pdfplumber
|
||||||
|
|
||||||
from .extract_charge import extract_charge
|
from .extract_charge import extract_charge, extract_remise_com
|
||||||
from .extract_locataire import extract_situation_loc
|
from .extract_locataire import extract_situation_loc
|
||||||
|
|
||||||
charge_table_settings = {
|
charge_table_settings = {
|
||||||
@ -16,6 +17,11 @@ def extract_from_pdf(pdf, charge_dest, location_dest):
|
|||||||
"""Build charge_dest and location_dest xlsx file from pdf"""
|
"""Build charge_dest and location_dest xlsx file from pdf"""
|
||||||
loc_tables = []
|
loc_tables = []
|
||||||
charge_table = []
|
charge_table = []
|
||||||
|
|
||||||
|
df_1st_charge = extract_remise_com(
|
||||||
|
pdf.pages[0].extract_table(charge_table_settings)
|
||||||
|
)
|
||||||
|
|
||||||
for page in pdf.pages[1:]:
|
for page in pdf.pages[1:]:
|
||||||
page_text = page.extract_text()
|
page_text = page.extract_text()
|
||||||
situation_loc_line = [
|
situation_loc_line = [
|
||||||
@ -35,7 +41,8 @@ def extract_from_pdf(pdf, charge_dest, location_dest):
|
|||||||
charge_table = page.extract_table(charge_table_settings)
|
charge_table = page.extract_table(charge_table_settings)
|
||||||
|
|
||||||
df_charge = extract_charge(charge_table)
|
df_charge = extract_charge(charge_table)
|
||||||
df_charge.to_excel(charge_dest, sheet_name="Charges", index=False)
|
df_charge_with_1st = pd.concat([df_1st_charge, df_charge])
|
||||||
|
df_charge_with_1st.to_excel(charge_dest, sheet_name="Charges", index=False)
|
||||||
logging.info(f"{charge_dest} saved")
|
logging.info(f"{charge_dest} saved")
|
||||||
|
|
||||||
df_loc = extract_situation_loc(loc_tables, mois=mois, annee=annee)
|
df_loc = extract_situation_loc(loc_tables, mois=mois, annee=annee)
|
||||||
|
@ -51,3 +51,13 @@ def extract_charge(table):
|
|||||||
)
|
)
|
||||||
|
|
||||||
return df
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
def extract_remise_com(table):
|
||||||
|
"""Extract "remise commercial" from first page"""
|
||||||
|
df = pd.DataFrame(table[1:], columns=table[0]).replace("", np.nan)
|
||||||
|
return df[
|
||||||
|
df["RECAPITULATIF DES OPERATIONS"].str.contains(
|
||||||
|
"Remise commerciale gérance", case=False, na=False
|
||||||
|
)
|
||||||
|
]
|
||||||
|
Loading…
Reference in New Issue
Block a user