53 lines
1.4 KiB
Python
53 lines
1.4 KiB
Python
#!/usr/bin/env python
|
|
# encoding: utf-8
|
|
|
|
import pandas as pd
|
|
from pathlib import Path
|
|
from unidecode import unidecode
|
|
|
|
|
|
__all__ = ["list_csvs", "extract_fields"]
|
|
|
|
|
|
def list_csvs(path):
|
|
"""list csv files in path
|
|
|
|
:example:
|
|
>>> list_csvs("./example/Tribe1/")
|
|
[PosixPath('example/Tribe1/210112_DS.csv'), PosixPath('example/Tribe1/210122_DS6.csv')]
|
|
>>> list_csvs("./example/Tribe1")
|
|
[PosixPath('example/Tribe1/210112_DS.csv'), PosixPath('example/Tribe1/210122_DS6.csv')]
|
|
"""
|
|
return list(Path(path).glob("*.csv"))
|
|
|
|
|
|
def extract_fields(csv_filename, fields=[], remove_duplicates=True):
|
|
"""Extract fields in csv
|
|
|
|
:param csv_filename: csv filename (with header)
|
|
:param fields: list of fields to extract (all fields if empty list - default)
|
|
:param remove_duplicates: keep uniques rows (default True)
|
|
|
|
:example:
|
|
>>> extract_fields("./example/Tribe1/210122_DS6.csv", ["Trimestre", "Nom", "Date"])
|
|
Trimestre Nom Date
|
|
0 1 DS6 22/01/2021
|
|
"""
|
|
df = pd.read_csv(csv_filename)
|
|
if fields:
|
|
df = df[fields]
|
|
if remove_duplicates:
|
|
return df.drop_duplicates()
|
|
return df
|
|
|
|
|
|
def build_id(template, element):
|
|
"""Build an id from template to the element
|
|
|
|
:example:
|
|
>>> element = {"name": "pléà", "place": "here", "foo":"bar"}
|
|
>>> build_id("{name} {place}", element)
|
|
'plea_here'
|
|
"""
|
|
return unidecode(template.format(**element)).replace(" ", "_")
|