Feat: add logging and don't watch first page
This commit is contained in:
parent
47c810a024
commit
64dd44488f
@ -1,3 +1,4 @@
|
|||||||
|
import logging
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import pdfplumber
|
import pdfplumber
|
||||||
@ -14,11 +15,10 @@ charge_table_settings = {
|
|||||||
def extract_from_pdf(pdf, charge_dest, location_dest):
|
def extract_from_pdf(pdf, charge_dest, location_dest):
|
||||||
"""Build charge_dest and location_dest xlsx file from pdf"""
|
"""Build charge_dest and location_dest xlsx file from pdf"""
|
||||||
loc_table = []
|
loc_table = []
|
||||||
for page in pdf.pages:
|
for page in pdf.pages[1:]:
|
||||||
|
page_text = page.extract_text()
|
||||||
situation_loc_line = [
|
situation_loc_line = [
|
||||||
l
|
l for l in page_text.split("\n") if "SITUATION DES LOCATAIRES" in l
|
||||||
for l in page.extract_text().split("\n")
|
|
||||||
if "SITUATION DES LOCATAIRES" in l
|
|
||||||
]
|
]
|
||||||
if situation_loc_line:
|
if situation_loc_line:
|
||||||
mois, annee = situation_loc_line[0].split(" ")[-2:]
|
mois, annee = situation_loc_line[0].split(" ")[-2:]
|
||||||
@ -27,15 +27,16 @@ def extract_from_pdf(pdf, charge_dest, location_dest):
|
|||||||
else:
|
else:
|
||||||
loc_table = page.extract_table()
|
loc_table = page.extract_table()
|
||||||
|
|
||||||
if "HONORAIRES" in page.extract_text():
|
elif "HONORAIRES" in page_text:
|
||||||
df_charge = extract_charge(page.extract_table(charge_table_settings))
|
table = page.extract_table(charge_table_settings)
|
||||||
|
df_charge = extract_charge(table)
|
||||||
df_charge.to_excel(charge_dest, sheet_name="Charges", index=False)
|
df_charge.to_excel(charge_dest, sheet_name="Charges", index=False)
|
||||||
print(f"{charge_dest} saved")
|
logging.info(f"{charge_dest} saved")
|
||||||
|
|
||||||
df_loc = extract_situation_loc(loc_table, mois=mois, annee=annee)
|
df_loc = extract_situation_loc(loc_table, mois=mois, annee=annee)
|
||||||
df_loc = df_loc.assign()
|
df_loc = df_loc.assign()
|
||||||
df_loc.to_excel(location_dest, sheet_name="Location", index=False)
|
df_loc.to_excel(location_dest, sheet_name="Location", index=False)
|
||||||
print(f"{location_dest} saved")
|
logging.info(f"{location_dest} saved")
|
||||||
|
|
||||||
|
|
||||||
def extract_save(pdf_file, dest):
|
def extract_save(pdf_file, dest):
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
import logging
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
@ -8,15 +10,6 @@ def extract_charge(table):
|
|||||||
pd.DataFrame(table[1:], columns=table[0])
|
pd.DataFrame(table[1:], columns=table[0])
|
||||||
.replace("", np.nan)
|
.replace("", np.nan)
|
||||||
.dropna(subset=["Débits"])
|
.dropna(subset=["Débits"])
|
||||||
.astype(
|
|
||||||
{
|
|
||||||
"Débits": "float64",
|
|
||||||
"Crédits": "float64",
|
|
||||||
"Dont T.V.A.": "float64",
|
|
||||||
"Locatif": "float64",
|
|
||||||
"Déductible": "float64",
|
|
||||||
}
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
drop_index = df[
|
drop_index = df[
|
||||||
df["RECAPITULATIF DES OPERATIONS"].str.contains("TOTAUX", case=False)
|
df["RECAPITULATIF DES OPERATIONS"].str.contains("TOTAUX", case=False)
|
||||||
@ -30,4 +23,14 @@ def extract_charge(table):
|
|||||||
inplace=True,
|
inplace=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
df = df.astype(
|
||||||
|
{
|
||||||
|
"Débits": "float64",
|
||||||
|
"Crédits": "float64",
|
||||||
|
"Dont T.V.A.": "float64",
|
||||||
|
"Locatif": "float64",
|
||||||
|
"Déductible": "float64",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
return df
|
return df
|
||||||
|
@ -1,9 +1,29 @@
|
|||||||
|
import logging
|
||||||
|
from logging.config import dictConfig
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import click
|
import click
|
||||||
|
|
||||||
from .extract import extract_save
|
from .extract import extract_save
|
||||||
|
|
||||||
|
logging_config = dict(
|
||||||
|
version=1,
|
||||||
|
formatters={"f": {"format": "%(levelname)-8s %(name)-12s %(message)s"}},
|
||||||
|
handlers={
|
||||||
|
"h": {
|
||||||
|
"class": "logging.StreamHandler",
|
||||||
|
"formatter": "f",
|
||||||
|
"level": logging.DEBUG,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
root={
|
||||||
|
"handlers": ["h"],
|
||||||
|
"level": logging.DEBUG,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
dictConfig(logging_config)
|
||||||
|
|
||||||
|
|
||||||
@click.group()
|
@click.group()
|
||||||
def main():
|
def main():
|
||||||
@ -37,6 +57,7 @@ def all(folder, dest):
|
|||||||
|
|
||||||
pdf_files = [x for x in p.iterdir() if ".pdf" in str(x)]
|
pdf_files = [x for x in p.iterdir() if ".pdf" in str(x)]
|
||||||
for pdf_file in pdf_files:
|
for pdf_file in pdf_files:
|
||||||
|
logging.info(f"Found {pdf_file}")
|
||||||
extract_save(pdf_file, d)
|
extract_save(pdf_file, d)
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user