204 lines
8.3 KiB
Python
204 lines
8.3 KiB
Python
#!/usr/bin/env python
|
|
# encoding: utf-8
|
|
|
|
import yaml
|
|
import os
|
|
import uuid
|
|
from pathlib import Path
|
|
import pandas as pd
|
|
from .. import Loader
|
|
from .lib import *
|
|
|
|
|
|
DEFAULT_CONFIG_FILE = os.path.join(os.path.dirname(__file__), "default_config.yml")
|
|
with open(DEFAULT_CONFIG_FILE, "r") as config:
|
|
DEFAULT_CONFIG = yaml.load(config, Loader=yaml.FullLoader)
|
|
|
|
|
|
def maybe_dataframe(datas):
|
|
try:
|
|
return [e[1] for e in datas.iterrows()]
|
|
except AttributeError:
|
|
return datas
|
|
|
|
|
|
class CSVLoader(Loader):
|
|
|
|
"""Loader when scores and metadatas are stored in csv files
|
|
|
|
:config:
|
|
|
|
:example:
|
|
>>> loader = CSVLoader()
|
|
>>> loader.get_config()
|
|
{'source': './', 'competences': {'Chercher': {'name': 'Chercher', 'abrv': 'Cher'}, 'Représenter': {'name': 'Représenter', 'abrv': 'Rep'}, 'Modéliser': {'name': 'Modéliser', 'abrv': 'Mod'}, 'Raisonner': {'name': 'Raisonner', 'abrv': 'Rai'}, 'Calculer': {'name': 'Calculer', 'abrv': 'Cal'}, 'Communiquer': {'name': 'Communiquer', 'abrv': 'Com'}}, 'valid_scores': {'BAD': 0, 'FEW': 1, 'NEARLY': 2, 'GOOD': 3, 'NOTFILLED': None, 'NOANSWER': '.', 'ABS': 'a'}, 'csv_fields': {'term': 'Trimestre', 'exam': 'Nom', 'date': 'Date', 'exercise': 'Exercice', 'question': 'Question', 'competence': 'Competence', 'theme': 'Domaine', 'comment': 'Commentaire', 'score_rate': 'Bareme', 'is_leveled': 'Est_nivele'}}
|
|
>>> loader = CSVLoader("./test_config.yml")
|
|
>>> loader.get_config()
|
|
{'source': './example', 'competences': {'Chercher': {'name': 'Chercher', 'abrv': 'Cher'}, 'Représenter': {'name': 'Représenter', 'abrv': 'Rep'}, 'Modéliser': {'name': 'Modéliser', 'abrv': 'Mod'}, 'Raisonner': {'name': 'Raisonner', 'abrv': 'Rai'}, 'Calculer': {'name': 'Calculer', 'abrv': 'Cal'}, 'Communiquer': {'name': 'Communiquer', 'abrv': 'Com'}}, 'valid_scores': {'BAD': 0, 'FEW': 1, 'NEARLY': 2, 'GOOD': 3, 'NOTFILLED': None, 'NOANSWER': '.', 'ABS': 'a'}, 'csv_fields': {'term': 'Trimestre', 'exam': 'Nom', 'date': 'Date', 'exercise': 'Exercice', 'question': 'Question', 'competence': 'Competence', 'theme': 'Domaine', 'comment': 'Commentaire', 'score_rate': 'Bareme', 'is_leveled': 'Est_nivele'}, 'output': './output', 'templates': 'templates/', 'tribes': {'Tribe1': {'name': 'Tribe1', 'type': 'Type1', 'students': 'tribe1.csv'}, 'Tribe2': {'name': 'Tribe2', 'students': 'tribe2.csv'}}}
|
|
|
|
"""
|
|
|
|
CONFIG = DEFAULT_CONFIG
|
|
|
|
def get_config(self):
|
|
""" Get config """
|
|
return self._config
|
|
|
|
def rename_columns(self, dataframe):
|
|
"""Rename dataframe column to match with `csv_fields` """
|
|
return dataframe.rename(columns=self._config["csv_fields"])
|
|
|
|
def reverse_csv_field(self, keys):
|
|
""" Reverse csv field from keys """
|
|
return [self._config["csv_fields"][k] for k in keys]
|
|
|
|
def get_tribes(self, only_names=False):
|
|
"""Get tribes list
|
|
|
|
:example:
|
|
>>> loader = CSVLoader("./test_config.yml")
|
|
>>> loader.get_tribes()
|
|
{'Tribe1': {'name': 'Tribe1', 'type': 'Type1', 'students': 'tribe1.csv'}, 'Tribe2': {'name': 'Tribe2', 'students': 'tribe2.csv'}}
|
|
>>> loader.get_tribes(only_names=True)
|
|
['Tribe1', 'Tribe2']
|
|
"""
|
|
if only_names:
|
|
return list(self._config["tribes"].keys())
|
|
return self._config["tribes"]
|
|
|
|
def get_exams(self, tribes=[]):
|
|
"""Get exams list
|
|
|
|
:param tribes: get only exams for those tribes
|
|
:return: dataframe of exams
|
|
|
|
:example:
|
|
>>> loader = CSVLoader("./test_config.yml")
|
|
>>> exams = loader.get_exams(["Tribe1"])
|
|
>>> exams.columns
|
|
Index(['Nom', 'Date', 'Trimestre', 'origin', 'tribe', 'id'], dtype='object')
|
|
>>> exams.loc[:, exams.columns!="id"]
|
|
Nom Date Trimestre origin tribe
|
|
0 DS 12/01/2021 1 example/Tribe1/210112_DS.csv Tribe1
|
|
0 DS6 22/01/2021 1 example/Tribe1/210122_DS6.csv Tribe1
|
|
"""
|
|
exams = []
|
|
for tribe in tribes:
|
|
tribe_path = Path(self._config["source"]) / tribe
|
|
csvs = list_csvs(tribe_path)
|
|
for csv in csvs:
|
|
fields = self.reverse_csv_field(["exam", "date", "term"])
|
|
exam = extract_fields(csv, fields)
|
|
exam = self.rename_columns(exam)
|
|
exam["origin"] = csv
|
|
exam["tribe"] = tribe
|
|
exam["id"] = uuid.uuid4()
|
|
exams.append(exam)
|
|
return pd.concat(exams)
|
|
|
|
def get_students(self, tribes=[]):
|
|
"""Get student list
|
|
|
|
:param tribes: concerned tribes
|
|
"""
|
|
return ""
|
|
|
|
def get_exam_questions(self, exams=[]):
|
|
"""Get questions for exams stored in score_files
|
|
|
|
:param exams: list or dataframe of exams metadatas (need origin field to find the csv)
|
|
|
|
:example:
|
|
>>> loader = CSVLoader("./test_config.yml")
|
|
>>> exams = loader.get_exams(["Tribe1"])
|
|
>>> loader.get_exam_questions([exams.iloc[0]]).columns
|
|
Index(['Exercice', 'Question', 'Competence', 'Domaine', 'Commentaire',
|
|
'Bareme', 'Est_nivele', 'origin', 'exam_id', 'id'],
|
|
dtype='object')
|
|
>>> questions = loader.get_exam_questions(exams)
|
|
>>> questions.columns
|
|
Index(['Exercice', 'Question', 'Competence', 'Domaine', 'Commentaire',
|
|
'Bareme', 'Est_nivele', 'origin', 'exam_id', 'id'],
|
|
dtype='object')
|
|
"""
|
|
_exams = maybe_dataframe(exams)
|
|
|
|
questions = []
|
|
for exam in _exams:
|
|
fields = self.reverse_csv_field(
|
|
[
|
|
"exercise",
|
|
"question",
|
|
"competence",
|
|
"theme",
|
|
"comment",
|
|
"score_rate",
|
|
"is_leveled",
|
|
]
|
|
)
|
|
question = extract_fields(exam["origin"], fields)
|
|
question = self.rename_columns(question)
|
|
question["origin"] = exam["origin"]
|
|
question["exam_id"] = exam["id"]
|
|
question["id"] = uuid.uuid4()
|
|
questions.append(question)
|
|
|
|
return pd.concat(questions)
|
|
|
|
def get_questions_scores(self, questions=[]):
|
|
"""Get scores of those questions
|
|
|
|
:param questions: list or dataframe of questions metadatas (need origin field to find the csv)
|
|
|
|
:example:
|
|
>>> loader = CSVLoader("./test_config.yml")
|
|
>>> exams = loader.get_exams(["Tribe1"])
|
|
>>> questions = loader.get_exam_questions(exams)
|
|
>>> scores = loader.get_questions_scores(questions)
|
|
>>> scores.columns
|
|
Index(['origin', 'question_id', 'student_name', 'score'], dtype='object')
|
|
>>> scores["student_name"].unique()
|
|
array(['Star Tice', 'Umberto Dingate', 'Starlin Crangle',
|
|
'Humbert Bourcq', 'Gabriella Handyside', 'Stewart Eaves',
|
|
'Erick Going', 'Ase Praton', 'Rollins Planks', 'Dunstan Sarjant',
|
|
'Stacy Guiton', 'Ange Stanes', 'Amabelle Elleton',
|
|
'Darn Broomhall', 'Dyan Chatto', 'Keane Rennebach', 'Nari Paulton',
|
|
'Brandy Wase', 'Jaclyn Firidolfi', 'Violette Lockney'],
|
|
dtype=object)
|
|
"""
|
|
scores = []
|
|
group_questions = questions.groupby("origin")
|
|
for origin, questions_df in group_questions:
|
|
scores_df = pd.read_csv(origin)
|
|
student_names = [
|
|
c
|
|
for c in scores_df.columns
|
|
if c not in self._config["csv_fields"].values()
|
|
]
|
|
|
|
common_columns = [c for c in questions_df.columns if c in scores_df.columns]
|
|
scores_df = pd.merge(scores_df, questions_df, on=common_columns)
|
|
scores_df = scores_df.drop(
|
|
columns=list(self._config["csv_fields"].values()) + ["exam_id"]
|
|
)
|
|
|
|
kept_columns = [c for c in scores_df if c not in student_names]
|
|
scores_df = pd.melt(
|
|
scores_df,
|
|
id_vars=kept_columns,
|
|
value_vars=student_names,
|
|
var_name="student_name",
|
|
value_name="score",
|
|
).rename(columns={"id": "question_id"})
|
|
|
|
scores.append(scores_df)
|
|
|
|
return pd.concat(scores)
|
|
|
|
def get_student_scores(self, student):
|
|
"""Get scores of the student
|
|
|
|
:param student:
|
|
"""
|
|
return ""
|