DATA_BASE=./datas PDF_BASE=$(DATA_BASE)/pdfs PDF_YEARS=$(wildcard $(PDF_BASE)/*) RAW_BASE=$(DATA_BASE)/raw RAW_CRG=$(RAW_BASE)/CRG RAW_CRG_YEARS=$(subst $(PDF_BASE), $(RAW_CRG), $(PDF_YEARS)) $(RAW_CRG)/%/: $(wildcard $(PDF_BASE)/%/*) echo $(wildcard $(PDF_BASE)/$*/*) @echo ---- ls $(PDF_BASE)/$*/ @echo ---- echo $* @echo ---- echo $^ @echo ---- echo $? #./datas/raw/CRG/%: #pdf-oralia extract all --src $$year --dest $$(subst $$PDF_BASE, $$RAW_CRG, $$year) # $(RAW_CRG_YEARS): $(PDF_PATHS) # for year in $(PDF_PATHS); do \ # echo $$year; \ # echo $$(subst $$PDF_BASE, $$RAW_CRG, $$year); \ # echo "----"; \ # done; extract_pdfs: for year in 2021 2022 2023 2024; do \ mkdir -p $(RAW_CRG)/$$year/extracted;\ pdf-oralia extract all --src $(PDF_BASE)/$$year/ --dest $(RAW_CRG)/$$year/extracted; \ pdf-oralia join --src $(RAW_CRG)/$$year/extracted/ --dest $(RAW_CRG)/$$year/; \ done clean_raw: rm -rf ./PLESNA Compta SYSTEM/raw/**/*.csv clean_built: rm -rf $(DATA_BASE)/staging/**/*.csv rm -rf $(DATA_BASE)/gold/**/*.csv rm -rf $(DATA_BASE)/datamart/**/*.csv rm -rf $(DATA_BASE)/datamart/**/*.xlsx run_ingest: python -m scripts ingest run_feature: python -m scripts feature run_datamart: python -m scripts datamart build: clean_built run_ingest run_feature run_datamart clean_all: clean_built clean_raw import_nextcloud: rsync -av ~/Nextcloud/PLESNA\ Compta\ SYSTEM/Histoire/ ./datas/Histoire push_nextcloud: rsync -av ./datas/datamart/ ~/Nextcloud/PLESNA\ Compta\ SYSTEM/DataMart