Feat: add logging and don't watch first page
This commit is contained in:
parent
47c810a024
commit
64dd44488f
@ -1,3 +1,4 @@
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import pdfplumber
|
||||
@ -14,11 +15,10 @@ charge_table_settings = {
|
||||
def extract_from_pdf(pdf, charge_dest, location_dest):
|
||||
"""Build charge_dest and location_dest xlsx file from pdf"""
|
||||
loc_table = []
|
||||
for page in pdf.pages:
|
||||
for page in pdf.pages[1:]:
|
||||
page_text = page.extract_text()
|
||||
situation_loc_line = [
|
||||
l
|
||||
for l in page.extract_text().split("\n")
|
||||
if "SITUATION DES LOCATAIRES" in l
|
||||
l for l in page_text.split("\n") if "SITUATION DES LOCATAIRES" in l
|
||||
]
|
||||
if situation_loc_line:
|
||||
mois, annee = situation_loc_line[0].split(" ")[-2:]
|
||||
@ -27,15 +27,16 @@ def extract_from_pdf(pdf, charge_dest, location_dest):
|
||||
else:
|
||||
loc_table = page.extract_table()
|
||||
|
||||
if "HONORAIRES" in page.extract_text():
|
||||
df_charge = extract_charge(page.extract_table(charge_table_settings))
|
||||
elif "HONORAIRES" in page_text:
|
||||
table = page.extract_table(charge_table_settings)
|
||||
df_charge = extract_charge(table)
|
||||
df_charge.to_excel(charge_dest, sheet_name="Charges", index=False)
|
||||
print(f"{charge_dest} saved")
|
||||
logging.info(f"{charge_dest} saved")
|
||||
|
||||
df_loc = extract_situation_loc(loc_table, mois=mois, annee=annee)
|
||||
df_loc = df_loc.assign()
|
||||
df_loc.to_excel(location_dest, sheet_name="Location", index=False)
|
||||
print(f"{location_dest} saved")
|
||||
logging.info(f"{location_dest} saved")
|
||||
|
||||
|
||||
def extract_save(pdf_file, dest):
|
||||
|
@ -1,3 +1,5 @@
|
||||
import logging
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
@ -8,15 +10,6 @@ def extract_charge(table):
|
||||
pd.DataFrame(table[1:], columns=table[0])
|
||||
.replace("", np.nan)
|
||||
.dropna(subset=["Débits"])
|
||||
.astype(
|
||||
{
|
||||
"Débits": "float64",
|
||||
"Crédits": "float64",
|
||||
"Dont T.V.A.": "float64",
|
||||
"Locatif": "float64",
|
||||
"Déductible": "float64",
|
||||
}
|
||||
)
|
||||
)
|
||||
drop_index = df[
|
||||
df["RECAPITULATIF DES OPERATIONS"].str.contains("TOTAUX", case=False)
|
||||
@ -30,4 +23,14 @@ def extract_charge(table):
|
||||
inplace=True,
|
||||
)
|
||||
|
||||
df = df.astype(
|
||||
{
|
||||
"Débits": "float64",
|
||||
"Crédits": "float64",
|
||||
"Dont T.V.A.": "float64",
|
||||
"Locatif": "float64",
|
||||
"Déductible": "float64",
|
||||
}
|
||||
)
|
||||
|
||||
return df
|
||||
|
@ -1,9 +1,29 @@
|
||||
import logging
|
||||
from logging.config import dictConfig
|
||||
from pathlib import Path
|
||||
|
||||
import click
|
||||
|
||||
from .extract import extract_save
|
||||
|
||||
logging_config = dict(
|
||||
version=1,
|
||||
formatters={"f": {"format": "%(levelname)-8s %(name)-12s %(message)s"}},
|
||||
handlers={
|
||||
"h": {
|
||||
"class": "logging.StreamHandler",
|
||||
"formatter": "f",
|
||||
"level": logging.DEBUG,
|
||||
}
|
||||
},
|
||||
root={
|
||||
"handlers": ["h"],
|
||||
"level": logging.DEBUG,
|
||||
},
|
||||
)
|
||||
|
||||
dictConfig(logging_config)
|
||||
|
||||
|
||||
@click.group()
|
||||
def main():
|
||||
@ -37,6 +57,7 @@ def all(folder, dest):
|
||||
|
||||
pdf_files = [x for x in p.iterdir() if ".pdf" in str(x)]
|
||||
for pdf_file in pdf_files:
|
||||
logging.info(f"Found {pdf_file}")
|
||||
extract_save(pdf_file, d)
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user