recopytex/recopytex/store/filesystem/lib.py

41 lines
1.1 KiB
Python

#!/usr/bin/env python
# encoding: utf-8
import pandas as pd
from pathlib import Path
__all__ = ["list_csvs", "extract_fields"]
def list_csvs(path):
"""list csv files in path
:example:
>>> list_csvs("./example/Tribe1/")
[PosixPath('example/Tribe1/210112_DS.csv'), PosixPath('example/Tribe1/210122_DS6.csv')]
>>> list_csvs("./example/Tribe1")
[PosixPath('example/Tribe1/210112_DS.csv'), PosixPath('example/Tribe1/210122_DS6.csv')]
"""
return list(Path(path).glob("*.csv"))
def extract_fields(csv_filename, fields=[], remove_duplicates=True):
"""Extract fields in csv
:param csv_filename: csv filename (with header)
:param fields: list of fields to extract (all fields if empty list - default)
:param remove_duplicates: keep uniques rows (default True)
:example:
>>> extract_fields("./example/Tribe1/210122_DS6.csv", ["Trimestre", "Nom", "Date"])
Trimestre Nom Date
0 1 DS6 22/01/2021
"""
df = pd.read_csv(csv_filename)
if fields:
df = df[fields]
if remove_duplicates:
return df.drop_duplicates()
return df