Feat: add destination in flux definition
This commit is contained in:
parent
25ede1789a
commit
d0961b0909
@ -36,14 +36,20 @@ class CSVSource(Source):
|
||||
return pd.read_csv(filepath, **self.options)
|
||||
|
||||
|
||||
class Destination(BaseModel):
|
||||
name: str
|
||||
|
||||
|
||||
class Flux(BaseModel):
|
||||
sources: list[Source]
|
||||
transformation: Callable
|
||||
extra_kwrds: dict = {}
|
||||
destination: Destination
|
||||
split_destination: str = ""
|
||||
|
||||
|
||||
def to_csv(df, dest_basename):
|
||||
dest = dest_basename.parent / (dest_basename.name + ".csv")
|
||||
def to_csv(df, dest_basename: Path) -> Path:
|
||||
dest = dest_basename.parent / (dest_basename.stem + ".csv")
|
||||
if dest.exists():
|
||||
df.to_csv(dest, mode="a", header=False, index=False)
|
||||
else:
|
||||
@ -52,14 +58,14 @@ def to_csv(df, dest_basename):
|
||||
|
||||
|
||||
def write_split_by(
|
||||
df: pd.DataFrame, column: str, dest_path: Path, writing_func
|
||||
df: pd.DataFrame, column: str, dest_path: Path, name: str, writing_func
|
||||
) -> list[Path]:
|
||||
wrote_files = []
|
||||
|
||||
for col_value in df[column].unique():
|
||||
filtered_df = df[df[column] == col_value]
|
||||
|
||||
dest_basename = dest_path / f"{col_value}"
|
||||
dest_basename = dest_path / f"{name}-{col_value}"
|
||||
dest = writing_func(filtered_df, dest_basename)
|
||||
wrote_files.append(dest)
|
||||
|
||||
@ -89,7 +95,10 @@ def split_duplicates(
|
||||
|
||||
|
||||
def consume_fluxes(
|
||||
fluxes: dict[str, Flux], origin_path: Path, dest_path: Path, writing_func=to_csv
|
||||
fluxes: dict[str, Flux],
|
||||
origin_path: Path,
|
||||
dest_path: Path,
|
||||
writing_func=to_csv,
|
||||
):
|
||||
duplicated = {}
|
||||
wrote_files = []
|
||||
@ -104,6 +113,16 @@ def consume_fluxes(
|
||||
|
||||
df = flux.transformation(src_df, **flux.extra_kwrds)
|
||||
|
||||
files = write_split_by(df, "Année", dest_path, writing_func)
|
||||
if flux.split_destination:
|
||||
files = write_split_by(
|
||||
df=df,
|
||||
column=flux.split_destination,
|
||||
dest_path=dest_path,
|
||||
name=flux.destination.name,
|
||||
writing_func=writing_func,
|
||||
)
|
||||
else:
|
||||
dest_basename = dest_path / flux.destination.name
|
||||
files = [writing_func(df, dest_basename)]
|
||||
wrote_files += files
|
||||
return wrote_files
|
||||
|
@ -5,7 +5,7 @@ import pandas as pd
|
||||
|
||||
from scripts.flux import consume_fluxes
|
||||
|
||||
from .flux import ExcelSource, Flux
|
||||
from .flux import Destination, ExcelSource, Flux
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
@ -126,6 +126,8 @@ FLUXES = {
|
||||
],
|
||||
transformation=trans_2017_2021,
|
||||
extra_kwrds={"stagging_columns": STAGGING_COLUMNS},
|
||||
destination=Destination(name="crg"),
|
||||
split_destination="Année",
|
||||
),
|
||||
"2022 - charge.xlsx": Flux(
|
||||
sources=[
|
||||
@ -133,6 +135,8 @@ FLUXES = {
|
||||
],
|
||||
transformation=trans_2022_charge,
|
||||
extra_kwrds={"stagging_columns": STAGGING_COLUMNS},
|
||||
destination=Destination(name="crg"),
|
||||
split_destination="Année",
|
||||
),
|
||||
"2022 - locataire.xlsx": Flux(
|
||||
sources=[
|
||||
@ -140,6 +144,8 @@ FLUXES = {
|
||||
],
|
||||
transformation=trans_2022_loc,
|
||||
extra_kwrds={"stagging_columns": STAGGING_COLUMNS},
|
||||
destination=Destination(name="crg"),
|
||||
split_destination="Année",
|
||||
),
|
||||
"2023 - charge et locataire.xlsx": Flux(
|
||||
sources=[
|
||||
@ -150,6 +156,8 @@ FLUXES = {
|
||||
],
|
||||
transformation=trans_2023,
|
||||
extra_kwrds={"year": 2023, "stagging_columns": STAGGING_COLUMNS},
|
||||
destination=Destination(name="crg"),
|
||||
split_destination="Année",
|
||||
),
|
||||
}
|
||||
|
||||
@ -166,5 +174,9 @@ if __name__ == "__main__":
|
||||
staging_crg_path = staging_path / "CRG"
|
||||
assert staging_crg_path.exists()
|
||||
|
||||
crg_files = consume_fluxes(FLUXES, history_crg_path, staging_crg_path)
|
||||
crg_files = consume_fluxes(
|
||||
fluxes=FLUXES,
|
||||
origin_path=history_crg_path,
|
||||
dest_path=staging_crg_path,
|
||||
)
|
||||
print(crg_files)
|
||||
|
@ -4,7 +4,7 @@ from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from scripts.flux import CSVSource, Flux, consume_fluxes
|
||||
from scripts.flux import CSVSource, Destination, Flux, consume_fluxes
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
@ -38,10 +38,11 @@ def build_crg_fluxes(
|
||||
crg_path: Path, pattern: str, transformation: Callable, csv_options: dict = {}
|
||||
) -> dict[str, Flux]:
|
||||
fluxes = {}
|
||||
for crg in crg_path.glob(pattern):
|
||||
fluxes[f"CRG - {crg}"] = Flux(
|
||||
sources=[CSVSource(filename=crg.name, options=csv_options)],
|
||||
for file in crg_path.glob(pattern):
|
||||
fluxes[f"CRG - {file.name}"] = Flux(
|
||||
sources=[CSVSource(filename=file.name, options=csv_options)],
|
||||
transformation=transformation,
|
||||
destination=Destination(name=file.name),
|
||||
)
|
||||
return fluxes
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user