diff --git a/pdf_oralia/extract.py b/pdf_oralia/extract.py index d1a7599..a19d587 100644 --- a/pdf_oralia/extract.py +++ b/pdf_oralia/extract.py @@ -13,6 +13,10 @@ extract_table_settings = { } +class ExtractError(Exception): + pass + + def extract_date(page_text): """Extract date from a page @@ -123,12 +127,10 @@ def extract_save(pdf_file, dest, save=[]): pdf_file = Path(pdf_file) xlss = extract_plan(pdf_file, dest) - if save != []: - dfs = from_pdf(pdf_file) + dfs = from_pdf(pdf_file) - for s in save: - dfs[s].to_excel(xlss[s], sheet_name=s, index=False) - logging.info(f"{xlss[s]} saved") - return {k: v for k, v in xlss.items() if k in save} + for s in save: + dfs[s].to_excel(xlss[s], sheet_name=s, index=False) + logging.info(f"{xlss[s]} saved") - return xlss + return {k: v for k, v in xlss.items() if k in save} diff --git a/pdf_oralia/scripts.py b/pdf_oralia/scripts.py index f97f41d..02c7ff6 100644 --- a/pdf_oralia/scripts.py +++ b/pdf_oralia/scripts.py @@ -62,6 +62,7 @@ def on(pdf_file, dest, force, only_plan): else: dest = Path(dest) + assert pdf_file.exists() logging.info(f"Found {pdf_file}") plan_dest = extract_plan(pdf_file, dest)