2023-10-05 13:10:39 +00:00
|
|
|
import glob
|
2023-12-27 18:58:12 +00:00
|
|
|
import logging
|
2023-10-05 13:10:39 +00:00
|
|
|
|
|
|
|
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
|
def join_excel(src, dest, file_pattern):
|
|
|
|
"""Join every excel file in arc respecting file_pattern into on unique file in dist"""
|
|
|
|
filenames = list_files(src, file_pattern)
|
2023-12-27 18:58:12 +00:00
|
|
|
logging.debug(f"Concatenate {filenames}")
|
2023-10-05 13:10:39 +00:00
|
|
|
dfs = extract_dfs(filenames)
|
|
|
|
joined_df = pd.concat(dfs)
|
2023-12-30 16:45:15 +00:00
|
|
|
logging.debug(f"Writing joined excel to {dest}")
|
2023-10-05 13:22:14 +00:00
|
|
|
joined_df.to_excel(dest, index=False)
|
2023-12-30 16:45:15 +00:00
|
|
|
logging.debug(f"with {len(joined_df)} rows")
|
2023-10-05 13:10:39 +00:00
|
|
|
|
|
|
|
|
|
|
|
def list_files(src, file_glob):
|
|
|
|
return list(glob.iglob(f"{src}/{file_glob}"))
|
|
|
|
|
|
|
|
|
|
|
|
def extract_dfs(filenames):
|
|
|
|
dfs = []
|
|
|
|
for filename in filenames:
|
2023-12-30 16:45:15 +00:00
|
|
|
logging.debug(f"Extracting {filename}")
|
|
|
|
df = pd.read_excel(filename)
|
|
|
|
logging.debug(f"Found {len(df)} rows")
|
|
|
|
dfs.append(df)
|
2023-10-05 13:10:39 +00:00
|
|
|
return dfs
|