Feat: split extract
This commit is contained in:
parent
4031be77c6
commit
e3cc7d18a2
@ -1,62 +1,9 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import click
|
|
||||||
import numpy as np
|
|
||||||
import pandas as pd
|
|
||||||
import pdfplumber
|
import pdfplumber
|
||||||
|
|
||||||
|
from .extract_charge import extract_charge
|
||||||
def extract_situation_loc(table):
|
from .extract_locataire import extract_situation_loc
|
||||||
try:
|
|
||||||
df = pd.DataFrame(table[1:], columns=table[0])
|
|
||||||
except IndexError:
|
|
||||||
print(table)
|
|
||||||
rows = []
|
|
||||||
for i, row in df[df["Locataires"] == "Totaux"].iterrows():
|
|
||||||
above_row_loc = df.iloc[i - 1]["Locataires"]
|
|
||||||
up_row = pd.concat(
|
|
||||||
[
|
|
||||||
row,
|
|
||||||
parse_above_loc(above_row_loc),
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
rows.append(up_row)
|
|
||||||
df_cleaned = pd.concat(rows, axis=1).T
|
|
||||||
df_cleaned.drop(["Locataires", "", "Période"], axis=1, inplace=True)
|
|
||||||
return df_cleaned
|
|
||||||
|
|
||||||
|
|
||||||
def parse_above_loc(content):
|
|
||||||
row = {}
|
|
||||||
try:
|
|
||||||
app, loc = content.split("\n")
|
|
||||||
except ValueError:
|
|
||||||
row["lot"] = ""
|
|
||||||
row["type"] = ""
|
|
||||||
row["locataire"] = content
|
|
||||||
|
|
||||||
else:
|
|
||||||
app_ = app.split(" ")
|
|
||||||
row["lot"] = app_[1]
|
|
||||||
row["type"] = " ".join(app_[2:])
|
|
||||||
row["locataire"] = loc
|
|
||||||
return pd.Series(row)
|
|
||||||
|
|
||||||
|
|
||||||
def extract_charge(table):
|
|
||||||
df = (
|
|
||||||
pd.DataFrame(table[1:], columns=table[0])
|
|
||||||
.replace("", np.nan)
|
|
||||||
.dropna(subset=["Débits"])
|
|
||||||
)
|
|
||||||
drop_index = df[
|
|
||||||
df["RECAPITULATIF DES OPERATIONS"].str.contains("TOTAUX", case=False)
|
|
||||||
| df["RECAPITULATIF DES OPERATIONS"].str.contains("solde", case=False)
|
|
||||||
].index
|
|
||||||
df.drop(drop_index, inplace=True)
|
|
||||||
return df
|
|
||||||
|
|
||||||
|
|
||||||
charge_table_settings = {
|
charge_table_settings = {
|
||||||
"vertical_strategy": "lines",
|
"vertical_strategy": "lines",
|
||||||
|
17
pdf_oralia/extract_charge.py
Normal file
17
pdf_oralia/extract_charge.py
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
def extract_charge(table):
|
||||||
|
"""From pdfplumber table extract the charge dataframe"""
|
||||||
|
df = (
|
||||||
|
pd.DataFrame(table[1:], columns=table[0])
|
||||||
|
.replace("", np.nan)
|
||||||
|
.dropna(subset=["Débits"])
|
||||||
|
)
|
||||||
|
drop_index = df[
|
||||||
|
df["RECAPITULATIF DES OPERATIONS"].str.contains("TOTAUX", case=False)
|
||||||
|
| df["RECAPITULATIF DES OPERATIONS"].str.contains("solde", case=False)
|
||||||
|
].index
|
||||||
|
df.drop(drop_index, inplace=True)
|
||||||
|
return df
|
40
pdf_oralia/extract_locataire.py
Normal file
40
pdf_oralia/extract_locataire.py
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
def parse_above_loc(content):
|
||||||
|
row = {}
|
||||||
|
try:
|
||||||
|
app, loc = content.split("\n")
|
||||||
|
except ValueError:
|
||||||
|
row["lot"] = ""
|
||||||
|
row["type"] = ""
|
||||||
|
row["locataire"] = content
|
||||||
|
|
||||||
|
else:
|
||||||
|
app_ = app.split(" ")
|
||||||
|
row["lot"] = app_[1]
|
||||||
|
row["type"] = " ".join(app_[2:])
|
||||||
|
row["locataire"] = loc
|
||||||
|
return pd.Series(row)
|
||||||
|
|
||||||
|
|
||||||
|
def extract_situation_loc(table):
|
||||||
|
"""From pdfplumber table extract locataire df"""
|
||||||
|
try:
|
||||||
|
df = pd.DataFrame(table[1:], columns=table[0])
|
||||||
|
except IndexError:
|
||||||
|
print(table)
|
||||||
|
rows = []
|
||||||
|
for i, row in df[df["Locataires"] == "Totaux"].iterrows():
|
||||||
|
above_row_loc = df.iloc[i - 1]["Locataires"]
|
||||||
|
up_row = pd.concat(
|
||||||
|
[
|
||||||
|
row,
|
||||||
|
parse_above_loc(above_row_loc),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
rows.append(up_row)
|
||||||
|
df_cleaned = pd.concat(rows, axis=1).T
|
||||||
|
df_cleaned.drop(["Locataires", "", "Période"], axis=1, inplace=True)
|
||||||
|
return df_cleaned
|
Loading…
Reference in New Issue
Block a user