Feat: rename store to database and fix exam and question ids

This commit is contained in:
2021-04-12 11:42:24 +02:00
parent a292fe23e0
commit 6331573940
6 changed files with 74 additions and 31 deletions

View File

@@ -0,0 +1,88 @@
#!/usr/bin/env python
# encoding: utf-8
from abc import ABC, abstractmethod
import yaml
"""
Adapter to pull data from the filesystem
# Loader
# Writer
"""
class Loader(ABC):
"""Load data from source"""
CONFIG = {}
def __init__(self, configfile=""):
"""Init loader
:param configfile: yaml file with informations on data source
"""
self._config = self.CONFIG
if configfile.endswith(".yml"):
with open(configfile, "r") as config:
self._config.update(yaml.load(config, Loader=yaml.FullLoader))
def get_config(self):
""" Get config"""
return self._config
@abstractmethod
def get_tribes(self):
""" Get tribes list """
pass
@abstractmethod
def get_exams(self, tribes=[]):
"""Get exams list
:param tribes: get only exams for those tribes
"""
pass
@abstractmethod
def get_students(self, tribes=[]):
"""Get student list
:param filters: list of filters
"""
pass
@abstractmethod
def get_exam_questions(self, exams=[]):
"""Get questions for the exam
:param exams: questions for those exams only
"""
pass
@abstractmethod
def get_questions_scores(self, questions=[]):
"""Get scores of those questions
:param questions: score for those questions
"""
pass
# @abstractmethod
# def get_student_scores(self, student):
# """Get scores of the student
# :param student:
# """
# pass
class Writer(ABC):
""" Write datas to the source """
def __init__(self):
pass

View File

@@ -0,0 +1,15 @@
#!/usr/bin/env python
# encoding: utf-8
"""
Store data using filesystem for organisation, csv for scores
## Organisation
- tribe1.csv # list of students for the tribe
- tribe1/
- exam1.csv # questions and scores for exam1
- exam1.yml # Extra information about exam1
- exam2.csv # questions and scores for exam2
"""

View File

@@ -0,0 +1,47 @@
---
source: ./ # basepath where to start
competences: # Competences
Chercher:
name: Chercher
abrv: Cher
Représenter:
name: Représenter
abrv: Rep
Modéliser:
name: Modéliser
abrv: Mod
Raisonner:
name: Raisonner
abrv: Rai
Calculer:
name: Calculer
abrv: Cal
Communiquer:
name: Communiquer
abrv: Com
valid_scores: #
BAD: 0 # Everything is bad
FEW: 1 # Few good things
NEARLY: 2 # Nearly good but things are missing
GOOD: 3 # Everything is good
NOTFILLED: # The item is not scored yet
NOANSWER: . # Student gives no answer (count as 0)
ABS: "a" # Student has absent (this score won't be impact the final mark)
csv_fields: # dataframe_field: csv_field
term: Trimestre
exam: Nom
date: Date
exercise: Exercice
question: Question
competence: Competence
theme: Domaine
comment: Commentaire
score_rate: Bareme
is_leveled: Est_nivele
id_templates:
exam: "{name}_{tribe}"
question: "{exam_id}_{exercise}_{question}_{comment}"

View File

@@ -0,0 +1,52 @@
#!/usr/bin/env python
# encoding: utf-8
import pandas as pd
from pathlib import Path
from unidecode import unidecode
__all__ = ["list_csvs", "extract_fields"]
def list_csvs(path):
"""list csv files in path
:example:
>>> list_csvs("./example/Tribe1/")
[PosixPath('example/Tribe1/210112_DS.csv'), PosixPath('example/Tribe1/210122_DS6.csv')]
>>> list_csvs("./example/Tribe1")
[PosixPath('example/Tribe1/210112_DS.csv'), PosixPath('example/Tribe1/210122_DS6.csv')]
"""
return list(Path(path).glob("*.csv"))
def extract_fields(csv_filename, fields=[], remove_duplicates=True):
"""Extract fields in csv
:param csv_filename: csv filename (with header)
:param fields: list of fields to extract (all fields if empty list - default)
:param remove_duplicates: keep uniques rows (default True)
:example:
>>> extract_fields("./example/Tribe1/210122_DS6.csv", ["Trimestre", "Nom", "Date"])
Trimestre Nom Date
0 1 DS6 22/01/2021
"""
df = pd.read_csv(csv_filename)
if fields:
df = df[fields]
if remove_duplicates:
return df.drop_duplicates()
return df
def build_id(template, element):
"""Build an id from template to the element
:example:
>>> element = {"name": "pléà", "place": "here", "foo":"bar"}
>>> build_id("{name} {place}", element)
'plea_here'
"""
return unidecode(template.format(**element)).replace(" ", "_")

View File

@@ -0,0 +1,239 @@
#!/usr/bin/env python
# encoding: utf-8
import yaml
import os
import uuid
from pathlib import Path
import pandas as pd
from .. import Loader
from .lib import list_csvs, extract_fields, build_id
DEFAULT_CONFIG_FILE = os.path.join(os.path.dirname(__file__), "default_config.yml")
with open(DEFAULT_CONFIG_FILE, "r") as config:
DEFAULT_CONFIG = yaml.load(config, Loader=yaml.FullLoader)
def maybe_dataframe(datas):
try:
return [e[1] for e in datas.iterrows()]
except AttributeError:
return datas
class CSVLoader(Loader):
"""Loader when scores and metadatas are stored in csv files
:config:
:example:
>>> loader = CSVLoader()
>>> loader.get_config()
{'source': './', 'competences': {'Chercher': {'name': 'Chercher', 'abrv': 'Cher'}, 'Représenter': {'name': 'Représenter', 'abrv': 'Rep'}, 'Modéliser': {'name': 'Modéliser', 'abrv': 'Mod'}, 'Raisonner': {'name': 'Raisonner', 'abrv': 'Rai'}, 'Calculer': {'name': 'Calculer', 'abrv': 'Cal'}, 'Communiquer': {'name': 'Communiquer', 'abrv': 'Com'}}, 'valid_scores': {'BAD': 0, 'FEW': 1, 'NEARLY': 2, 'GOOD': 3, 'NOTFILLED': None, 'NOANSWER': '.', 'ABS': 'a'}, 'csv_fields': {'term': 'Trimestre', 'exam': 'Nom', 'date': 'Date', 'exercise': 'Exercice', 'question': 'Question', 'competence': 'Competence', 'theme': 'Domaine', 'comment': 'Commentaire', 'score_rate': 'Bareme', 'is_leveled': 'Est_nivele'}, 'id_templates': {'exam': '{name}_{tribe}', 'question': '{exam_id}_{exercise}_{question}_{comment}'}}
>>> loader = CSVLoader("./test_config.yml")
>>> loader.get_config()
{'source': './example', 'competences': {'Chercher': {'name': 'Chercher', 'abrv': 'Cher'}, 'Représenter': {'name': 'Représenter', 'abrv': 'Rep'}, 'Modéliser': {'name': 'Modéliser', 'abrv': 'Mod'}, 'Raisonner': {'name': 'Raisonner', 'abrv': 'Rai'}, 'Calculer': {'name': 'Calculer', 'abrv': 'Cal'}, 'Communiquer': {'name': 'Communiquer', 'abrv': 'Com'}}, 'valid_scores': {'BAD': 0, 'FEW': 1, 'NEARLY': 2, 'GOOD': 3, 'NOTFILLED': None, 'NOANSWER': '.', 'ABS': 'a'}, 'csv_fields': {'term': 'Trimestre', 'exam': 'Nom', 'date': 'Date', 'exercise': 'Exercice', 'question': 'Question', 'competence': 'Competence', 'theme': 'Domaine', 'comment': 'Commentaire', 'score_rate': 'Bareme', 'is_leveled': 'Est_nivele'}, 'id_templates': {'exam': '{name}_{tribe}', 'question': '{exam_id}_{exercise}_{question}_{comment}'}, 'output': './output', 'templates': 'templates/', 'tribes': {'Tribe1': {'name': 'Tribe1', 'type': 'Type1', 'students': 'tribe1.csv'}, 'Tribe2': {'name': 'Tribe2', 'students': 'tribe2.csv'}}}
"""
CONFIG = DEFAULT_CONFIG
def get_config(self):
""" Get config """
return self._config
def rename_columns(self, dataframe):
"""Rename dataframe column to match with `csv_fields`
:param dataframe: the dataframe
:example:
>>> loader = CSVLoader()
>>>
"""
return dataframe.rename(
columns={v: k for k, v in self._config["csv_fields"].items()}
)
def reverse_csv_field(self, keys):
""" Reverse csv field from keys """
return [self._config["csv_fields"][k] for k in keys]
def get_tribes(self, only_names=False):
"""Get tribes list
:example:
>>> loader = CSVLoader("./test_config.yml")
>>> loader.get_tribes()
{'Tribe1': {'name': 'Tribe1', 'type': 'Type1', 'students': 'tribe1.csv'}, 'Tribe2': {'name': 'Tribe2', 'students': 'tribe2.csv'}}
>>> loader.get_tribes(only_names=True)
['Tribe1', 'Tribe2']
"""
if only_names:
return list(self._config["tribes"].keys())
return self._config["tribes"]
def get_exams(self, tribes=[]):
"""Get exams list
:param tribes: get only exams for those tribes
:return: dataframe of exams
:example:
>>> loader = CSVLoader("./test_config.yml")
>>> exams = loader.get_exams(["Tribe1"])
>>> exams.columns
Index(['name', 'date', 'term', 'origin', 'tribe', 'id'], dtype='object')
>>> exams
name date term origin tribe id
0 DS 12/01/2021 1 example/Tribe1/210112_DS.csv Tribe1 DS_Tribe1
0 DS6 22/01/2021 1 example/Tribe1/210122_DS6.csv Tribe1 DS6_Tribe1
"""
exams = []
for tribe in tribes:
tribe_path = Path(self._config["source"]) / tribe
csvs = list_csvs(tribe_path)
for csv in csvs:
fields = self.reverse_csv_field(["exam", "date", "term"])
exam = extract_fields(csv, fields)
exam = self.rename_columns(exam)
exam = exam.rename(columns={"exam": "name"})
exam["origin"] = csv
exam["tribe"] = tribe
exam["id"] = build_id(
self._config["id_templates"]["exam"], exam.iloc[0]
)
exams.append(exam)
return pd.concat(exams)
def get_exam_questions(self, exams=[]):
"""Get questions for exams stored in score_files
:param exams: list or dataframe of exams metadatas (need origin field to find the csv)
:example:
>>> loader = CSVLoader("./test_config.yml")
>>> exams = loader.get_exams(["Tribe1"])
>>> loader.get_exam_questions([exams.iloc[0]]).columns
Index(['exercise', 'question', 'competence', 'theme', 'comment', 'score_rate',
'is_leveled', 'origin', 'exam_id', 'id'],
dtype='object')
>>> questions = loader.get_exam_questions(exams)
>>> questions.iloc[0]
exercise Exercice 1
question 1
competence Calculer
theme Plop
comment Coucou
score_rate 1.0
is_leveled 1.0
origin example/Tribe1/210112_DS.csv
exam_id DS_Tribe1
id DS_Tribe1_Exercice_1_1_Coucou
Name: 0, dtype: object
"""
_exams = maybe_dataframe(exams)
questions = []
for exam in _exams:
fields = self.reverse_csv_field(
[
"exercise",
"question",
"competence",
"theme",
"comment",
"score_rate",
"is_leveled",
]
)
question = extract_fields(exam["origin"], fields)
question = self.rename_columns(question)
question["origin"] = exam["origin"]
question["exam_id"] = exam["id"]
question["id"] = build_id(
self._config["id_templates"]["question"], question.iloc[0]
)
questions.append(question)
return pd.concat(questions)
def get_questions_scores(self, questions=[]):
"""Get scores of those questions
:param questions: list or dataframe of questions metadatas (need origin field to find the csv)
:example:
>>> loader = CSVLoader("./test_config.yml")
>>> exams = loader.get_exams(["Tribe1"])
>>> questions = loader.get_exam_questions(exams)
>>> scores = loader.get_questions_scores(questions)
>>> scores.columns
Index(['term', 'exam', 'date', 'exercise', 'question', 'competence', 'theme',
'comment', 'score_rate', 'is_leveled', 'origin', 'exam_id',
'question_id', 'student_name', 'score'],
dtype='object')
>>> scores["student_name"].unique()
array(['Star Tice', 'Umberto Dingate', 'Starlin Crangle',
'Humbert Bourcq', 'Gabriella Handyside', 'Stewart Eaves',
'Erick Going', 'Ase Praton', 'Rollins Planks', 'Dunstan Sarjant',
'Stacy Guiton', 'Ange Stanes', 'Amabelle Elleton',
'Darn Broomhall', 'Dyan Chatto', 'Keane Rennebach', 'Nari Paulton',
'Brandy Wase', 'Jaclyn Firidolfi', 'Violette Lockney'],
dtype=object)
"""
scores = []
group_questions = questions.groupby("origin")
for origin, questions_df in group_questions:
scores_df = pd.read_csv(origin)
scores_df = self.rename_columns(scores_df)
student_names = [
c
for c in scores_df.columns
if c not in self._config["csv_fields"].keys()
]
common_columns = [c for c in questions_df.columns if c in scores_df.columns]
scores_df = pd.merge(scores_df, questions_df, on=common_columns)
kept_columns = [c for c in scores_df if c not in student_names]
scores_df = pd.melt(
scores_df,
id_vars=kept_columns,
value_vars=student_names,
var_name="student_name",
value_name="score",
)
scores_df = scores_df.rename(columns={"id": "question_id"})
scores.append(scores_df)
return pd.concat(scores)
def get_students(self, tribes=[]):
"""Get student list
:param tribes: concerned tribes
:example:
>>> loader = CSVLoader("./test_config.yml")
>>> tribes = loader.get_tribes()
>>> students = loader.get_students([tribes["Tribe1"]])
>>> students.columns
Index(['Nom', 'email', 'origin', 'tribe'], dtype='object')
"""
students = []
for tribe in tribes:
students_csv = Path(self._config["source"]) / tribe["students"]
students_df = pd.read_csv(students_csv)
students_df["origin"] = students_csv
students_df["tribe"] = tribe["name"]
students.append(students_df)
return pd.concat(students)
def get_student_scores(self, student=[]):
"""Get all scores for students"""
pass

View File

@@ -0,0 +1,7 @@
#!/usr/bin/env python
# encoding: utf-8
"""
"""