#!/usr/bin/env python # encoding: utf-8 import pandas as pd from pathlib import Path from unidecode import unidecode __all__ = ["list_csvs", "extract_fields"] def list_csvs(path): """list csv files in path :example: >>> list_csvs("./example/Tribe1/") [PosixPath('example/Tribe1/210112_DS.csv'), PosixPath('example/Tribe1/210122_DS6.csv')] >>> list_csvs("./example/Tribe1") [PosixPath('example/Tribe1/210112_DS.csv'), PosixPath('example/Tribe1/210122_DS6.csv')] """ return list(Path(path).glob("*.csv")) def extract_fields(csv_filename, fields=[], remove_duplicates=True): """Extract fields in csv :param csv_filename: csv filename (with header) :param fields: list of fields to extract (all fields if empty list - default) :param remove_duplicates: keep uniques rows (default True) :example: >>> extract_fields("./example/Tribe1/210122_DS6.csv", ["Trimestre", "Nom", "Date"]) Trimestre Nom Date 0 1 DS6 22/01/2021 """ df = pd.read_csv(csv_filename) if fields: df = df[fields] if remove_duplicates: return df.drop_duplicates() return df def build_id(template, element): """Build an id from template to the element :example: >>> element = {"name": "pléà", "place": "here", "foo":"bar"} >>> build_id("{name} {place}", element) 'plea_here' """ return unidecode(template.format(**element)).replace(" ", "_")