67 lines
1.5 KiB
Makefile
67 lines
1.5 KiB
Makefile
DATA_BASE=./datas
|
|
|
|
PDF_BASE=$(DATA_BASE)/pdfs
|
|
PDF_YEARS=$(wildcard $(PDF_BASE)/*)
|
|
|
|
RAW_BASE=$(DATA_BASE)/raw
|
|
RAW_CRG=$(RAW_BASE)/CRG
|
|
RAW_CRG_YEARS=$(subst $(PDF_BASE), $(RAW_CRG), $(PDF_YEARS))
|
|
|
|
|
|
$(RAW_CRG)/%/: $(wildcard $(PDF_BASE)/%/*)
|
|
echo $(wildcard $(PDF_BASE)/$*/*)
|
|
@echo ----
|
|
ls $(PDF_BASE)/$*/
|
|
@echo ----
|
|
echo $*
|
|
@echo ----
|
|
echo $^
|
|
@echo ----
|
|
echo $?
|
|
|
|
#./datas/raw/CRG/%:
|
|
#pdf-oralia extract all --src $$year --dest $$(subst $$PDF_BASE, $$RAW_CRG, $$year)
|
|
# $(RAW_CRG_YEARS): $(PDF_PATHS)
|
|
# for year in $(PDF_PATHS); do \
|
|
# echo $$year; \
|
|
# echo $$(subst $$PDF_BASE, $$RAW_CRG, $$year); \
|
|
# echo "----"; \
|
|
# done;
|
|
|
|
extract_pdfs:
|
|
for year in 2021 2022 2023 2024; do \
|
|
mkdir -p $(RAW_CRG)/$$year/extracted;\
|
|
pdf-oralia extract all --src $(PDF_BASE)/$$year/ --dest $(RAW_CRG)/$$year/extracted; \
|
|
pdf-oralia join --src $(RAW_CRG)/$$year/extracted/ --dest $(RAW_CRG)/$$year/; \
|
|
done
|
|
|
|
clean_raw:
|
|
rm -rf ./PLESNA Compta SYSTEM/raw/**/*.csv
|
|
|
|
clean_built:
|
|
rm -rf $(DATA_BASE)/staging/**/*.csv
|
|
rm -rf $(DATA_BASE)/gold/**/*.csv
|
|
rm -rf $(DATA_BASE)/datamart/**/*.csv
|
|
rm -rf $(DATA_BASE)/datamart/**/*.xlsx
|
|
|
|
run_ingest:
|
|
python -m scripts ingest
|
|
|
|
run_feature:
|
|
python -m scripts feature
|
|
|
|
run_datamart:
|
|
python -m scripts datamart
|
|
|
|
build: clean_built run_ingest run_feature run_datamart
|
|
|
|
clean_all: clean_built clean_raw
|
|
|
|
import_nextcloud:
|
|
rsync -av ~/Nextcloud/PLESNA\ Compta\ SYSTEM/Histoire/ ./datas/Histoire
|
|
|
|
push_nextcloud:
|
|
rsync -av ./datas/datamart/ ~/Nextcloud/PLESNA\ Compta\ SYSTEM/DataMart
|
|
|
|
|