pdf_auralia/pdf_oralia/join.py

30 lines
810 B
Python
Raw Permalink Normal View History

2023-10-05 13:10:39 +00:00
import glob
2023-12-27 18:58:12 +00:00
import logging
2023-10-05 13:10:39 +00:00
import pandas as pd
def join_excel(src, dest, file_pattern):
"""Join every excel file in arc respecting file_pattern into on unique file in dist"""
filenames = list_files(src, file_pattern)
2023-12-27 18:58:12 +00:00
logging.debug(f"Concatenate {filenames}")
2023-10-05 13:10:39 +00:00
dfs = extract_dfs(filenames)
joined_df = pd.concat(dfs)
2023-12-30 16:45:15 +00:00
logging.debug(f"Writing joined excel to {dest}")
2023-10-05 13:22:14 +00:00
joined_df.to_excel(dest, index=False)
2023-12-30 16:45:15 +00:00
logging.debug(f"with {len(joined_df)} rows")
2023-10-05 13:10:39 +00:00
def list_files(src, file_glob):
return list(glob.iglob(f"{src}/{file_glob}"))
def extract_dfs(filenames):
dfs = []
for filename in filenames:
2023-12-30 16:45:15 +00:00
logging.debug(f"Extracting {filename}")
df = pd.read_excel(filename)
logging.debug(f"Found {len(df)} rows")
dfs.append(df)
2023-10-05 13:10:39 +00:00
return dfs