recopytex/recopytex/store/filesystem/loader.py

204 lines
8.3 KiB
Python

#!/usr/bin/env python
# encoding: utf-8
import yaml
import os
import uuid
from pathlib import Path
import pandas as pd
from .. import Loader
from .lib import *
DEFAULT_CONFIG_FILE = os.path.join(os.path.dirname(__file__), "default_config.yml")
with open(DEFAULT_CONFIG_FILE, "r") as config:
DEFAULT_CONFIG = yaml.load(config, Loader=yaml.FullLoader)
def maybe_dataframe(datas):
try:
return [e[1] for e in datas.iterrows()]
except AttributeError:
return datas
class CSVLoader(Loader):
"""Loader when scores and metadatas are stored in csv files
:config:
:example:
>>> loader = CSVLoader()
>>> loader.get_config()
{'source': './', 'competences': {'Chercher': {'name': 'Chercher', 'abrv': 'Cher'}, 'Représenter': {'name': 'Représenter', 'abrv': 'Rep'}, 'Modéliser': {'name': 'Modéliser', 'abrv': 'Mod'}, 'Raisonner': {'name': 'Raisonner', 'abrv': 'Rai'}, 'Calculer': {'name': 'Calculer', 'abrv': 'Cal'}, 'Communiquer': {'name': 'Communiquer', 'abrv': 'Com'}}, 'valid_scores': {'BAD': 0, 'FEW': 1, 'NEARLY': 2, 'GOOD': 3, 'NOTFILLED': None, 'NOANSWER': '.', 'ABS': 'a'}, 'csv_fields': {'term': 'Trimestre', 'exam': 'Nom', 'date': 'Date', 'exercise': 'Exercice', 'question': 'Question', 'competence': 'Competence', 'theme': 'Domaine', 'comment': 'Commentaire', 'score_rate': 'Bareme', 'is_leveled': 'Est_nivele'}}
>>> loader = CSVLoader("./test_config.yml")
>>> loader.get_config()
{'source': './example', 'competences': {'Chercher': {'name': 'Chercher', 'abrv': 'Cher'}, 'Représenter': {'name': 'Représenter', 'abrv': 'Rep'}, 'Modéliser': {'name': 'Modéliser', 'abrv': 'Mod'}, 'Raisonner': {'name': 'Raisonner', 'abrv': 'Rai'}, 'Calculer': {'name': 'Calculer', 'abrv': 'Cal'}, 'Communiquer': {'name': 'Communiquer', 'abrv': 'Com'}}, 'valid_scores': {'BAD': 0, 'FEW': 1, 'NEARLY': 2, 'GOOD': 3, 'NOTFILLED': None, 'NOANSWER': '.', 'ABS': 'a'}, 'csv_fields': {'term': 'Trimestre', 'exam': 'Nom', 'date': 'Date', 'exercise': 'Exercice', 'question': 'Question', 'competence': 'Competence', 'theme': 'Domaine', 'comment': 'Commentaire', 'score_rate': 'Bareme', 'is_leveled': 'Est_nivele'}, 'output': './output', 'templates': 'templates/', 'tribes': {'Tribe1': {'name': 'Tribe1', 'type': 'Type1', 'students': 'tribe1.csv'}, 'Tribe2': {'name': 'Tribe2', 'students': 'tribe2.csv'}}}
"""
CONFIG = DEFAULT_CONFIG
def get_config(self):
""" Get config """
return self._config
def rename_columns(self, dataframe):
"""Rename dataframe column to match with `csv_fields` """
return dataframe.rename(columns=self._config["csv_fields"])
def reverse_csv_field(self, keys):
""" Reverse csv field from keys """
return [self._config["csv_fields"][k] for k in keys]
def get_tribes(self, only_names=False):
"""Get tribes list
:example:
>>> loader = CSVLoader("./test_config.yml")
>>> loader.get_tribes()
{'Tribe1': {'name': 'Tribe1', 'type': 'Type1', 'students': 'tribe1.csv'}, 'Tribe2': {'name': 'Tribe2', 'students': 'tribe2.csv'}}
>>> loader.get_tribes(only_names=True)
['Tribe1', 'Tribe2']
"""
if only_names:
return list(self._config["tribes"].keys())
return self._config["tribes"]
def get_exams(self, tribes=[]):
"""Get exams list
:param tribes: get only exams for those tribes
:return: dataframe of exams
:example:
>>> loader = CSVLoader("./test_config.yml")
>>> exams = loader.get_exams(["Tribe1"])
>>> exams.columns
Index(['Nom', 'Date', 'Trimestre', 'origin', 'tribe', 'id'], dtype='object')
>>> exams.loc[:, exams.columns!="id"]
Nom Date Trimestre origin tribe
0 DS 12/01/2021 1 example/Tribe1/210112_DS.csv Tribe1
0 DS6 22/01/2021 1 example/Tribe1/210122_DS6.csv Tribe1
"""
exams = []
for tribe in tribes:
tribe_path = Path(self._config["source"]) / tribe
csvs = list_csvs(tribe_path)
for csv in csvs:
fields = self.reverse_csv_field(["exam", "date", "term"])
exam = extract_fields(csv, fields)
exam = self.rename_columns(exam)
exam["origin"] = csv
exam["tribe"] = tribe
exam["id"] = uuid.uuid4()
exams.append(exam)
return pd.concat(exams)
def get_students(self, tribes=[]):
"""Get student list
:param tribes: concerned tribes
"""
return ""
def get_exam_questions(self, exams=[]):
"""Get questions for exams stored in score_files
:param exams: list or dataframe of exams metadatas (need origin field to find the csv)
:example:
>>> loader = CSVLoader("./test_config.yml")
>>> exams = loader.get_exams(["Tribe1"])
>>> loader.get_exam_questions([exams.iloc[0]]).columns
Index(['Exercice', 'Question', 'Competence', 'Domaine', 'Commentaire',
'Bareme', 'Est_nivele', 'origin', 'exam_id', 'id'],
dtype='object')
>>> questions = loader.get_exam_questions(exams)
>>> questions.columns
Index(['Exercice', 'Question', 'Competence', 'Domaine', 'Commentaire',
'Bareme', 'Est_nivele', 'origin', 'exam_id', 'id'],
dtype='object')
"""
_exams = maybe_dataframe(exams)
questions = []
for exam in _exams:
fields = self.reverse_csv_field(
[
"exercise",
"question",
"competence",
"theme",
"comment",
"score_rate",
"is_leveled",
]
)
question = extract_fields(exam["origin"], fields)
question = self.rename_columns(question)
question["origin"] = exam["origin"]
question["exam_id"] = exam["id"]
question["id"] = uuid.uuid4()
questions.append(question)
return pd.concat(questions)
def get_questions_scores(self, questions=[]):
"""Get scores of those questions
:param questions: list or dataframe of questions metadatas (need origin field to find the csv)
:example:
>>> loader = CSVLoader("./test_config.yml")
>>> exams = loader.get_exams(["Tribe1"])
>>> questions = loader.get_exam_questions(exams)
>>> scores = loader.get_questions_scores(questions)
>>> scores.columns
Index(['origin', 'question_id', 'student_name', 'score'], dtype='object')
>>> scores["student_name"].unique()
array(['Star Tice', 'Umberto Dingate', 'Starlin Crangle',
'Humbert Bourcq', 'Gabriella Handyside', 'Stewart Eaves',
'Erick Going', 'Ase Praton', 'Rollins Planks', 'Dunstan Sarjant',
'Stacy Guiton', 'Ange Stanes', 'Amabelle Elleton',
'Darn Broomhall', 'Dyan Chatto', 'Keane Rennebach', 'Nari Paulton',
'Brandy Wase', 'Jaclyn Firidolfi', 'Violette Lockney'],
dtype=object)
"""
scores = []
group_questions = questions.groupby("origin")
for origin, questions_df in group_questions:
scores_df = pd.read_csv(origin)
student_names = [
c
for c in scores_df.columns
if c not in self._config["csv_fields"].values()
]
common_columns = [c for c in questions_df.columns if c in scores_df.columns]
scores_df = pd.merge(scores_df, questions_df, on=common_columns)
scores_df = scores_df.drop(
columns=list(self._config["csv_fields"].values()) + ["exam_id"]
)
kept_columns = [c for c in scores_df if c not in student_names]
scores_df = pd.melt(
scores_df,
id_vars=kept_columns,
value_vars=student_names,
var_name="student_name",
value_name="score",
).rename(columns={"id": "question_id"})
scores.append(scores_df)
return pd.concat(scores)
def get_student_scores(self, student):
"""Get scores of the student
:param student:
"""
return ""