#!/usr/bin/env python # encoding: utf-8 import yaml import os import uuid from pathlib import Path import pandas as pd from .. import Loader from .lib import list_csvs, extract_fields, build_id DEFAULT_CONFIG_FILE = os.path.join(os.path.dirname(__file__), "default_config.yml") with open(DEFAULT_CONFIG_FILE, "r") as config: DEFAULT_CONFIG = yaml.load(config, Loader=yaml.FullLoader) def maybe_dataframe(datas): try: return [e[1] for e in datas.iterrows()] except AttributeError: return datas class CSVLoader(Loader): """Loader when scores and metadatas are stored in csv files :config: :example: >>> loader = CSVLoader() >>> loader.get_config() {'source': './', 'competences': {'Chercher': {'name': 'Chercher', 'abrv': 'Cher'}, 'Représenter': {'name': 'Représenter', 'abrv': 'Rep'}, 'Modéliser': {'name': 'Modéliser', 'abrv': 'Mod'}, 'Raisonner': {'name': 'Raisonner', 'abrv': 'Rai'}, 'Calculer': {'name': 'Calculer', 'abrv': 'Cal'}, 'Communiquer': {'name': 'Communiquer', 'abrv': 'Com'}}, 'scores': {'BAD': {'value': 0, 'numeric_value': 0, 'color': '#E7472B', 'comment': 'Faux'}, 'FEW': {'value': 1, 'numeric_value': 1, 'color': '#FF712B', 'comment': 'Peu juste'}, 'NEARLY': {'value': 2, 'numeric_value': 2, 'color': '#F2EC4C', 'comment': 'Presque juste'}, 'GOOD': {'value': 3, 'numeric_value': 3, 'color': '#68D42F', 'comment': 'Juste'}, 'NOTFILLED': {'value': '', 'numeric_value': 'None', 'color': 'white', 'comment': 'En attente'}, 'NOANSWER': {'value': '.', 'numeric_value': 0, 'color': 'black', 'comment': 'Pas de réponse'}, 'ABS': {'value': 'a', 'numeric_value': 'None', 'color': 'lightgray', 'comment': 'Non noté'}}, 'csv_fields': {'term': 'Trimestre', 'exam': 'Nom', 'date': 'Date', 'exercise': 'Exercice', 'question': 'Question', 'competence': 'Competence', 'theme': 'Domaine', 'comment': 'Commentaire', 'score_rate': 'Bareme', 'is_leveled': 'Est_nivele'}, 'id_templates': {'exam': '{name}_{tribe}', 'question': '{exam_id}_{exercise}_{question}_{comment}'}} >>> loader = CSVLoader("./test_config.yml") >>> loader.get_config() {'source': './example', 'competences': {'Chercher': {'name': 'Chercher', 'abrv': 'Cher'}, 'Représenter': {'name': 'Représenter', 'abrv': 'Rep'}, 'Modéliser': {'name': 'Modéliser', 'abrv': 'Mod'}, 'Raisonner': {'name': 'Raisonner', 'abrv': 'Rai'}, 'Calculer': {'name': 'Calculer', 'abrv': 'Cal'}, 'Communiquer': {'name': 'Communiquer', 'abrv': 'Com'}}, 'scores': {'BAD': {'value': 0, 'numeric_value': 0, 'color': '#E7472B', 'comment': 'Faux'}, 'FEW': {'value': 1, 'numeric_value': 1, 'color': '#FF712B', 'comment': 'Peu juste'}, 'NEARLY': {'value': 2, 'numeric_value': 2, 'color': '#F2EC4C', 'comment': 'Presque juste'}, 'GOOD': {'value': 3, 'numeric_value': 3, 'color': '#68D42F', 'comment': 'Juste'}, 'NOTFILLED': {'value': '', 'numeric_value': 'None', 'color': 'white', 'comment': 'En attente'}, 'NOANSWER': {'value': '.', 'numeric_value': 0, 'color': 'black', 'comment': 'Pas de réponse'}, 'ABS': {'value': 'a', 'numeric_value': 'None', 'color': 'lightgray', 'comment': 'Non noté'}}, 'csv_fields': {'term': 'Trimestre', 'exam': 'Nom', 'date': 'Date', 'exercise': 'Exercice', 'question': 'Question', 'competence': 'Competence', 'theme': 'Domaine', 'comment': 'Commentaire', 'score_rate': 'Bareme', 'is_leveled': 'Est_nivele'}, 'id_templates': {'exam': '{name}_{tribe}', 'question': '{exam_id}_{exercise}_{question}_{comment}'}, 'output': './output', 'templates': 'templates/', 'tribes': {'Tribe1': {'name': 'Tribe1', 'type': 'Type1', 'students': 'tribe1.csv'}, 'Tribe2': {'name': 'Tribe2', 'students': 'tribe2.csv'}}} """ CONFIG = DEFAULT_CONFIG def get_config(self): """ Get config """ return self._config @property def exam_columns(self): return pd.Index(["name", "date", "term", "origin", "tribe", "id"]) @property def question_columns(self): return pd.Index( [ "exercise", "question", "competence", "theme", "comment", "score_rate", "is_leveled", "origin", "exam_id", "id", ] ) @property def score_columns(self): return pd.Index( [ "term", "exam", "date", "exercise", "question", "competence", "theme", "comment", "score_rate", "is_leveled", "origin", "exam_id", "question_id", "student_name", "score", ] ) def rename_columns(self, dataframe): """Rename dataframe column to match with `csv_fields` :param dataframe: the dataframe :example: >>> loader = CSVLoader() >>> """ return dataframe.rename( columns={v: k for k, v in self._config["csv_fields"].items()} ) def reverse_csv_field(self, keys): """ Reverse csv field from keys """ return [self._config["csv_fields"][k] for k in keys] def get_tribes(self, only_names=False): """Get tribes list :example: >>> loader = CSVLoader("./test_config.yml") >>> loader.get_tribes() {'Tribe1': {'name': 'Tribe1', 'type': 'Type1', 'students': 'tribe1.csv'}, 'Tribe2': {'name': 'Tribe2', 'students': 'tribe2.csv'}} >>> loader.get_tribes(only_names=True) ['Tribe1', 'Tribe2'] """ if only_names: return list(self._config["tribes"].keys()) return self._config["tribes"] def get_exams(self, tribes=[]): """Get exams list :param tribes: get only exams for those tribes :return: dataframe of exams :example: >>> loader = CSVLoader("./test_config.yml") >>> exams = loader.get_exams(["Tribe1"]) >>> all(exams.columns == loader.exam_columns) True >>> exams name date term origin tribe id 0 DS 12/01/2021 1 example/Tribe1/210112_DS.csv Tribe1 DS_Tribe1 0 DS6 22/01/2021 1 example/Tribe1/210122_DS6.csv Tribe1 DS6_Tribe1 """ exams = [] for tribe in tribes: tribe_path = Path(self._config["source"]) / tribe csvs = list_csvs(tribe_path) for csv in csvs: fields = self.reverse_csv_field(["exam", "date", "term"]) exam = extract_fields(csv, fields) exam = self.rename_columns(exam) exam = exam.rename(columns={"exam": "name"}) exam["origin"] = str(csv) exam["tribe"] = tribe exam["id"] = build_id( self._config["id_templates"]["exam"], exam.iloc[0] ) exams.append(exam) if exams: return pd.concat(exams) return pd.DataFrame(columns=["name", "date", "term", "origin", "tribe", "id"]) def get_exam_questions(self, exams=[]): """Get questions for exams stored in score_files :param exams: list or dataframe of exams metadatas (need origin field to find the csv) :example: >>> loader = CSVLoader("./test_config.yml") >>> exams = loader.get_exams(["Tribe1"]) >>> all(loader.get_exam_questions([exams.iloc[0]]).columns == loader.question_columns) True >>> questions = loader.get_exam_questions(exams) >>> questions.iloc[0] exercise Exercice 1 question 1 competence Calculer theme Plop comment Coucou score_rate 1.0 is_leveled 1.0 origin example/Tribe1/210112_DS.csv exam_id DS_Tribe1 id DS_Tribe1_Exercice_1_1_Coucou Name: 0, dtype: object """ _exams = maybe_dataframe(exams) questions = [] for exam in _exams: fields = self.reverse_csv_field( [ "exercise", "question", "competence", "theme", "comment", "score_rate", "is_leveled", ] ) question = extract_fields(exam["origin"], fields) question = self.rename_columns(question) question["origin"] = exam["origin"] question["exam_id"] = exam["id"] question["id"] = build_id( self._config["id_templates"]["question"], question.iloc[0] ) questions.append(question) return pd.concat(questions) def get_questions_scores(self, questions=[]): """Get scores of those questions :param questions: list or dataframe of questions metadatas (need origin field to find the csv) :example: >>> loader = CSVLoader("./test_config.yml") >>> exams = loader.get_exams(["Tribe1"]) >>> questions = loader.get_exam_questions(exams) >>> scores = loader.get_questions_scores(questions) >>> all(scores.columns == loader.score_columns) True >>> scores["student_name"].unique() array(['Star Tice', 'Umberto Dingate', 'Starlin Crangle', 'Humbert Bourcq', 'Gabriella Handyside', 'Stewart Eaves', 'Erick Going', 'Ase Praton', 'Rollins Planks', 'Dunstan Sarjant', 'Stacy Guiton', 'Ange Stanes', 'Amabelle Elleton', 'Darn Broomhall', 'Dyan Chatto', 'Keane Rennebach', 'Nari Paulton', 'Brandy Wase', 'Jaclyn Firidolfi', 'Violette Lockney'], dtype=object) """ scores = [] group_questions = questions.groupby("origin") for origin, questions_df in group_questions: scores_df = pd.read_csv(origin) scores_df = self.rename_columns(scores_df) student_names = [ c for c in scores_df.columns if c not in self._config["csv_fields"].keys() ] common_columns = [c for c in questions_df.columns if c in scores_df.columns] scores_df = pd.merge(scores_df, questions_df, on=common_columns) kept_columns = [c for c in scores_df if c not in student_names] scores_df = pd.melt( scores_df, id_vars=kept_columns, value_vars=student_names, var_name="student_name", value_name="score", ) scores_df = scores_df.rename(columns={"id": "question_id"}) scores.append(scores_df) return pd.concat(scores) def get_exam_scores(self, exams=[]): """Get scores for all question of the exam :param exams: list or dataframe of exams metadatas (need origin field to find the csv) :example: >>> loader = CSVLoader("./test_config.yml") >>> exams = loader.get_exams(["Tribe1"]) >>> scores = loader.get_exam_scores(exams) >>> scores.columns Index(['term', 'exam', 'date', 'exercise', 'question', 'competence', 'theme', 'comment', 'score_rate', 'is_leveled', 'origin', 'exam_id', 'question_id', 'student_name', 'score'], dtype='object') """ questions = self.get_exam_questions(exams) return self.get_questions_scores(questions) def get_students(self, tribes=[]): """Get student list :param tribes: concerned tribes :example: >>> loader = CSVLoader("./test_config.yml") >>> tribes = loader.get_tribes() >>> students = loader.get_students([tribes["Tribe1"]]) >>> students.columns Index(['Nom', 'email', 'origin', 'tribe'], dtype='object') """ students = [] for tribe in tribes: students_csv = Path(self._config["source"]) / tribe["students"] students_df = pd.read_csv(students_csv) students_df["origin"] = students_csv students_df["tribe"] = tribe["name"] students.append(students_df) return pd.concat(students) def get_student_scores(self, student=[]): """Get all scores for students""" pass