main #19
@ -0,0 +1 @@
|
||||
from .extract import from_pdf
|
@ -38,8 +38,9 @@ def catch_malformed_table(tables):
|
||||
return tables[0]
|
||||
|
||||
|
||||
def from_pdf(pdf):
|
||||
def from_pdf(pdf_file):
|
||||
"""Build dataframes one about charges and another on loc"""
|
||||
pdf = pdfplumber.open(pdf_file)
|
||||
recapitulatif_tables = []
|
||||
loc_tables = []
|
||||
charge_tables = []
|
||||
@ -90,8 +91,7 @@ def extract_save(pdf_file, dest):
|
||||
xls_charge = Path(dest) / f"{pdf_file.stem.replace(' ', '_')}_charge.xlsx"
|
||||
xls_locataire = Path(dest) / f"{pdf_file.stem.replace(' ', '_')}_locataire.xlsx"
|
||||
|
||||
pdf = pdfplumber.open(pdf_file)
|
||||
df_charge, df_loc = from_pdf(pdf)
|
||||
df_charge, df_loc = from_pdf(pdf_file)
|
||||
|
||||
df_charge.to_excel(xls_charge, sheet_name="Charges", index=False)
|
||||
logging.info(f"{xls_charge} saved")
|
||||
|
2927
poetry.lock
generated
2927
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -13,7 +13,7 @@ pdf-oralia = "pdf_oralia.scripts:main"
|
||||
python = "^3.10"
|
||||
click = "^8.1.3"
|
||||
pdfplumber = "^0.7.4"
|
||||
pandas = "^1.5.0"
|
||||
pandas = "^2.2.3"
|
||||
openpyxl = "^3.0.10"
|
||||
|
||||
|
||||
|
@ -1,76 +1,3 @@
|
||||
argon2-cffi==21.3.0
|
||||
argon2-cffi-bindings==21.2.0
|
||||
asttokens==2.0.8
|
||||
attrs==22.1.0
|
||||
backcall==0.2.0
|
||||
beautifulsoup4==4.11.1
|
||||
bleach==5.0.1
|
||||
cffi==1.15.1
|
||||
charset-normalizer==2.1.1
|
||||
cryptography==38.0.1
|
||||
debugpy==1.6.3
|
||||
decorator==5.1.1
|
||||
defusedxml==0.7.1
|
||||
entrypoints==0.4
|
||||
et-xmlfile==1.1.0
|
||||
executing==1.1.0
|
||||
fastjsonschema==2.16.2
|
||||
ipykernel==6.16.0
|
||||
ipython==8.5.0
|
||||
ipython-genutils==0.2.0
|
||||
ipywidgets==8.0.2
|
||||
jedi==0.18.1
|
||||
Jinja2==3.1.3
|
||||
jsonschema==4.16.0
|
||||
jupyter==1.0.0
|
||||
jupyter-console==6.4.4
|
||||
jupyter-core==4.11.1
|
||||
jupyter_client==7.3.5
|
||||
jupyterlab-pygments==0.2.2
|
||||
jupyterlab-widgets==3.0.3
|
||||
lxml==4.9.1
|
||||
MarkupSafe==2.1.5
|
||||
matplotlib-inline==0.1.6
|
||||
mistune==2.0.4
|
||||
nbclient==0.6.8
|
||||
nbconvert==7.0.0
|
||||
nbformat==5.6.1
|
||||
nest-asyncio==1.5.5
|
||||
notebook==6.4.12
|
||||
numpy==1.23.3
|
||||
openpyxl==3.0.10
|
||||
packaging==21.3
|
||||
pandas==1.5.0
|
||||
pandocfilters==1.5.0
|
||||
parso==0.8.3
|
||||
pdfminer.six==20220524
|
||||
pdfplumber==0.7.4
|
||||
pexpect==4.8.0
|
||||
pickleshare==0.7.5
|
||||
Pillow==9.2.0
|
||||
prometheus-client==0.14.1
|
||||
prompt-toolkit==3.0.31
|
||||
psutil==5.9.2
|
||||
ptyprocess==0.7.0
|
||||
pure-eval==0.2.2
|
||||
pycparser==2.21
|
||||
Pygments==2.13.0
|
||||
pyparsing==3.0.9
|
||||
pyrsistent==0.18.1
|
||||
python-dateutil==2.8.2
|
||||
pytz==2022.2.1
|
||||
pyzmq==24.0.1
|
||||
qtconsole==5.3.2
|
||||
QtPy==2.2.0
|
||||
Send2Trash==1.8.2
|
||||
six==1.16.0
|
||||
soupsieve==2.3.2.post1
|
||||
stack-data==0.5.1
|
||||
terminado==0.15.0
|
||||
tinycss2==1.1.1
|
||||
tornado==6.2
|
||||
traitlets==5.4.0
|
||||
Wand==0.6.10
|
||||
wcwidth==0.2.5
|
||||
webencodings==0.5.1
|
||||
widgetsnbextension==4.0.3
|
||||
pdfplumber
|
||||
numpy
|
||||
pandas
|
||||
|
Loading…
Reference in New Issue
Block a user