Feat: rename store to database and fix exam and question ids
This commit is contained in:
88
recopytex/database/__init__.py
Normal file
88
recopytex/database/__init__.py
Normal file
@@ -0,0 +1,88 @@
|
||||
#!/usr/bin/env python
|
||||
# encoding: utf-8
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
import yaml
|
||||
|
||||
"""
|
||||
|
||||
Adapter to pull data from the filesystem
|
||||
|
||||
# Loader
|
||||
|
||||
# Writer
|
||||
"""
|
||||
|
||||
|
||||
class Loader(ABC):
|
||||
|
||||
"""Load data from source"""
|
||||
|
||||
CONFIG = {}
|
||||
|
||||
def __init__(self, configfile=""):
|
||||
"""Init loader
|
||||
|
||||
:param configfile: yaml file with informations on data source
|
||||
"""
|
||||
self._config = self.CONFIG
|
||||
if configfile.endswith(".yml"):
|
||||
with open(configfile, "r") as config:
|
||||
self._config.update(yaml.load(config, Loader=yaml.FullLoader))
|
||||
|
||||
def get_config(self):
|
||||
""" Get config"""
|
||||
return self._config
|
||||
|
||||
@abstractmethod
|
||||
def get_tribes(self):
|
||||
""" Get tribes list """
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_exams(self, tribes=[]):
|
||||
"""Get exams list
|
||||
|
||||
:param tribes: get only exams for those tribes
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_students(self, tribes=[]):
|
||||
"""Get student list
|
||||
|
||||
:param filters: list of filters
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_exam_questions(self, exams=[]):
|
||||
"""Get questions for the exam
|
||||
|
||||
:param exams: questions for those exams only
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_questions_scores(self, questions=[]):
|
||||
"""Get scores of those questions
|
||||
|
||||
:param questions: score for those questions
|
||||
"""
|
||||
pass
|
||||
|
||||
# @abstractmethod
|
||||
# def get_student_scores(self, student):
|
||||
# """Get scores of the student
|
||||
|
||||
# :param student:
|
||||
# """
|
||||
# pass
|
||||
|
||||
|
||||
class Writer(ABC):
|
||||
|
||||
""" Write datas to the source """
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
15
recopytex/database/filesystem/__init__.py
Normal file
15
recopytex/database/filesystem/__init__.py
Normal file
@@ -0,0 +1,15 @@
|
||||
#!/usr/bin/env python
|
||||
# encoding: utf-8
|
||||
|
||||
"""
|
||||
Store data using filesystem for organisation, csv for scores
|
||||
|
||||
## Organisation
|
||||
|
||||
- tribe1.csv # list of students for the tribe
|
||||
- tribe1/
|
||||
- exam1.csv # questions and scores for exam1
|
||||
- exam1.yml # Extra information about exam1
|
||||
- exam2.csv # questions and scores for exam2
|
||||
"""
|
||||
|
||||
47
recopytex/database/filesystem/default_config.yml
Normal file
47
recopytex/database/filesystem/default_config.yml
Normal file
@@ -0,0 +1,47 @@
|
||||
---
|
||||
source: ./ # basepath where to start
|
||||
|
||||
competences: # Competences
|
||||
Chercher:
|
||||
name: Chercher
|
||||
abrv: Cher
|
||||
Représenter:
|
||||
name: Représenter
|
||||
abrv: Rep
|
||||
Modéliser:
|
||||
name: Modéliser
|
||||
abrv: Mod
|
||||
Raisonner:
|
||||
name: Raisonner
|
||||
abrv: Rai
|
||||
Calculer:
|
||||
name: Calculer
|
||||
abrv: Cal
|
||||
Communiquer:
|
||||
name: Communiquer
|
||||
abrv: Com
|
||||
|
||||
valid_scores: #
|
||||
BAD: 0 # Everything is bad
|
||||
FEW: 1 # Few good things
|
||||
NEARLY: 2 # Nearly good but things are missing
|
||||
GOOD: 3 # Everything is good
|
||||
NOTFILLED: # The item is not scored yet
|
||||
NOANSWER: . # Student gives no answer (count as 0)
|
||||
ABS: "a" # Student has absent (this score won't be impact the final mark)
|
||||
|
||||
csv_fields: # dataframe_field: csv_field
|
||||
term: Trimestre
|
||||
exam: Nom
|
||||
date: Date
|
||||
exercise: Exercice
|
||||
question: Question
|
||||
competence: Competence
|
||||
theme: Domaine
|
||||
comment: Commentaire
|
||||
score_rate: Bareme
|
||||
is_leveled: Est_nivele
|
||||
|
||||
id_templates:
|
||||
exam: "{name}_{tribe}"
|
||||
question: "{exam_id}_{exercise}_{question}_{comment}"
|
||||
52
recopytex/database/filesystem/lib.py
Normal file
52
recopytex/database/filesystem/lib.py
Normal file
@@ -0,0 +1,52 @@
|
||||
#!/usr/bin/env python
|
||||
# encoding: utf-8
|
||||
|
||||
import pandas as pd
|
||||
from pathlib import Path
|
||||
from unidecode import unidecode
|
||||
|
||||
|
||||
__all__ = ["list_csvs", "extract_fields"]
|
||||
|
||||
|
||||
def list_csvs(path):
|
||||
"""list csv files in path
|
||||
|
||||
:example:
|
||||
>>> list_csvs("./example/Tribe1/")
|
||||
[PosixPath('example/Tribe1/210112_DS.csv'), PosixPath('example/Tribe1/210122_DS6.csv')]
|
||||
>>> list_csvs("./example/Tribe1")
|
||||
[PosixPath('example/Tribe1/210112_DS.csv'), PosixPath('example/Tribe1/210122_DS6.csv')]
|
||||
"""
|
||||
return list(Path(path).glob("*.csv"))
|
||||
|
||||
|
||||
def extract_fields(csv_filename, fields=[], remove_duplicates=True):
|
||||
"""Extract fields in csv
|
||||
|
||||
:param csv_filename: csv filename (with header)
|
||||
:param fields: list of fields to extract (all fields if empty list - default)
|
||||
:param remove_duplicates: keep uniques rows (default True)
|
||||
|
||||
:example:
|
||||
>>> extract_fields("./example/Tribe1/210122_DS6.csv", ["Trimestre", "Nom", "Date"])
|
||||
Trimestre Nom Date
|
||||
0 1 DS6 22/01/2021
|
||||
"""
|
||||
df = pd.read_csv(csv_filename)
|
||||
if fields:
|
||||
df = df[fields]
|
||||
if remove_duplicates:
|
||||
return df.drop_duplicates()
|
||||
return df
|
||||
|
||||
|
||||
def build_id(template, element):
|
||||
"""Build an id from template to the element
|
||||
|
||||
:example:
|
||||
>>> element = {"name": "pléà", "place": "here", "foo":"bar"}
|
||||
>>> build_id("{name} {place}", element)
|
||||
'plea_here'
|
||||
"""
|
||||
return unidecode(template.format(**element)).replace(" ", "_")
|
||||
239
recopytex/database/filesystem/loader.py
Normal file
239
recopytex/database/filesystem/loader.py
Normal file
@@ -0,0 +1,239 @@
|
||||
#!/usr/bin/env python
|
||||
# encoding: utf-8
|
||||
|
||||
import yaml
|
||||
import os
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
import pandas as pd
|
||||
from .. import Loader
|
||||
from .lib import list_csvs, extract_fields, build_id
|
||||
|
||||
|
||||
DEFAULT_CONFIG_FILE = os.path.join(os.path.dirname(__file__), "default_config.yml")
|
||||
with open(DEFAULT_CONFIG_FILE, "r") as config:
|
||||
DEFAULT_CONFIG = yaml.load(config, Loader=yaml.FullLoader)
|
||||
|
||||
|
||||
def maybe_dataframe(datas):
|
||||
try:
|
||||
return [e[1] for e in datas.iterrows()]
|
||||
except AttributeError:
|
||||
return datas
|
||||
|
||||
|
||||
class CSVLoader(Loader):
|
||||
|
||||
"""Loader when scores and metadatas are stored in csv files
|
||||
|
||||
:config:
|
||||
|
||||
:example:
|
||||
>>> loader = CSVLoader()
|
||||
>>> loader.get_config()
|
||||
{'source': './', 'competences': {'Chercher': {'name': 'Chercher', 'abrv': 'Cher'}, 'Représenter': {'name': 'Représenter', 'abrv': 'Rep'}, 'Modéliser': {'name': 'Modéliser', 'abrv': 'Mod'}, 'Raisonner': {'name': 'Raisonner', 'abrv': 'Rai'}, 'Calculer': {'name': 'Calculer', 'abrv': 'Cal'}, 'Communiquer': {'name': 'Communiquer', 'abrv': 'Com'}}, 'valid_scores': {'BAD': 0, 'FEW': 1, 'NEARLY': 2, 'GOOD': 3, 'NOTFILLED': None, 'NOANSWER': '.', 'ABS': 'a'}, 'csv_fields': {'term': 'Trimestre', 'exam': 'Nom', 'date': 'Date', 'exercise': 'Exercice', 'question': 'Question', 'competence': 'Competence', 'theme': 'Domaine', 'comment': 'Commentaire', 'score_rate': 'Bareme', 'is_leveled': 'Est_nivele'}, 'id_templates': {'exam': '{name}_{tribe}', 'question': '{exam_id}_{exercise}_{question}_{comment}'}}
|
||||
>>> loader = CSVLoader("./test_config.yml")
|
||||
>>> loader.get_config()
|
||||
{'source': './example', 'competences': {'Chercher': {'name': 'Chercher', 'abrv': 'Cher'}, 'Représenter': {'name': 'Représenter', 'abrv': 'Rep'}, 'Modéliser': {'name': 'Modéliser', 'abrv': 'Mod'}, 'Raisonner': {'name': 'Raisonner', 'abrv': 'Rai'}, 'Calculer': {'name': 'Calculer', 'abrv': 'Cal'}, 'Communiquer': {'name': 'Communiquer', 'abrv': 'Com'}}, 'valid_scores': {'BAD': 0, 'FEW': 1, 'NEARLY': 2, 'GOOD': 3, 'NOTFILLED': None, 'NOANSWER': '.', 'ABS': 'a'}, 'csv_fields': {'term': 'Trimestre', 'exam': 'Nom', 'date': 'Date', 'exercise': 'Exercice', 'question': 'Question', 'competence': 'Competence', 'theme': 'Domaine', 'comment': 'Commentaire', 'score_rate': 'Bareme', 'is_leveled': 'Est_nivele'}, 'id_templates': {'exam': '{name}_{tribe}', 'question': '{exam_id}_{exercise}_{question}_{comment}'}, 'output': './output', 'templates': 'templates/', 'tribes': {'Tribe1': {'name': 'Tribe1', 'type': 'Type1', 'students': 'tribe1.csv'}, 'Tribe2': {'name': 'Tribe2', 'students': 'tribe2.csv'}}}
|
||||
"""
|
||||
|
||||
CONFIG = DEFAULT_CONFIG
|
||||
|
||||
def get_config(self):
|
||||
""" Get config """
|
||||
return self._config
|
||||
|
||||
def rename_columns(self, dataframe):
|
||||
"""Rename dataframe column to match with `csv_fields`
|
||||
|
||||
:param dataframe: the dataframe
|
||||
|
||||
:example:
|
||||
>>> loader = CSVLoader()
|
||||
>>>
|
||||
|
||||
"""
|
||||
return dataframe.rename(
|
||||
columns={v: k for k, v in self._config["csv_fields"].items()}
|
||||
)
|
||||
|
||||
def reverse_csv_field(self, keys):
|
||||
""" Reverse csv field from keys """
|
||||
return [self._config["csv_fields"][k] for k in keys]
|
||||
|
||||
def get_tribes(self, only_names=False):
|
||||
"""Get tribes list
|
||||
|
||||
:example:
|
||||
>>> loader = CSVLoader("./test_config.yml")
|
||||
>>> loader.get_tribes()
|
||||
{'Tribe1': {'name': 'Tribe1', 'type': 'Type1', 'students': 'tribe1.csv'}, 'Tribe2': {'name': 'Tribe2', 'students': 'tribe2.csv'}}
|
||||
>>> loader.get_tribes(only_names=True)
|
||||
['Tribe1', 'Tribe2']
|
||||
"""
|
||||
if only_names:
|
||||
return list(self._config["tribes"].keys())
|
||||
return self._config["tribes"]
|
||||
|
||||
def get_exams(self, tribes=[]):
|
||||
"""Get exams list
|
||||
|
||||
:param tribes: get only exams for those tribes
|
||||
:return: dataframe of exams
|
||||
|
||||
:example:
|
||||
>>> loader = CSVLoader("./test_config.yml")
|
||||
>>> exams = loader.get_exams(["Tribe1"])
|
||||
>>> exams.columns
|
||||
Index(['name', 'date', 'term', 'origin', 'tribe', 'id'], dtype='object')
|
||||
>>> exams
|
||||
name date term origin tribe id
|
||||
0 DS 12/01/2021 1 example/Tribe1/210112_DS.csv Tribe1 DS_Tribe1
|
||||
0 DS6 22/01/2021 1 example/Tribe1/210122_DS6.csv Tribe1 DS6_Tribe1
|
||||
"""
|
||||
exams = []
|
||||
for tribe in tribes:
|
||||
tribe_path = Path(self._config["source"]) / tribe
|
||||
csvs = list_csvs(tribe_path)
|
||||
for csv in csvs:
|
||||
fields = self.reverse_csv_field(["exam", "date", "term"])
|
||||
exam = extract_fields(csv, fields)
|
||||
exam = self.rename_columns(exam)
|
||||
exam = exam.rename(columns={"exam": "name"})
|
||||
exam["origin"] = csv
|
||||
exam["tribe"] = tribe
|
||||
exam["id"] = build_id(
|
||||
self._config["id_templates"]["exam"], exam.iloc[0]
|
||||
)
|
||||
exams.append(exam)
|
||||
return pd.concat(exams)
|
||||
|
||||
def get_exam_questions(self, exams=[]):
|
||||
"""Get questions for exams stored in score_files
|
||||
|
||||
:param exams: list or dataframe of exams metadatas (need origin field to find the csv)
|
||||
|
||||
:example:
|
||||
>>> loader = CSVLoader("./test_config.yml")
|
||||
>>> exams = loader.get_exams(["Tribe1"])
|
||||
>>> loader.get_exam_questions([exams.iloc[0]]).columns
|
||||
Index(['exercise', 'question', 'competence', 'theme', 'comment', 'score_rate',
|
||||
'is_leveled', 'origin', 'exam_id', 'id'],
|
||||
dtype='object')
|
||||
>>> questions = loader.get_exam_questions(exams)
|
||||
>>> questions.iloc[0]
|
||||
exercise Exercice 1
|
||||
question 1
|
||||
competence Calculer
|
||||
theme Plop
|
||||
comment Coucou
|
||||
score_rate 1.0
|
||||
is_leveled 1.0
|
||||
origin example/Tribe1/210112_DS.csv
|
||||
exam_id DS_Tribe1
|
||||
id DS_Tribe1_Exercice_1_1_Coucou
|
||||
Name: 0, dtype: object
|
||||
"""
|
||||
_exams = maybe_dataframe(exams)
|
||||
|
||||
questions = []
|
||||
for exam in _exams:
|
||||
fields = self.reverse_csv_field(
|
||||
[
|
||||
"exercise",
|
||||
"question",
|
||||
"competence",
|
||||
"theme",
|
||||
"comment",
|
||||
"score_rate",
|
||||
"is_leveled",
|
||||
]
|
||||
)
|
||||
question = extract_fields(exam["origin"], fields)
|
||||
question = self.rename_columns(question)
|
||||
question["origin"] = exam["origin"]
|
||||
question["exam_id"] = exam["id"]
|
||||
question["id"] = build_id(
|
||||
self._config["id_templates"]["question"], question.iloc[0]
|
||||
)
|
||||
questions.append(question)
|
||||
|
||||
return pd.concat(questions)
|
||||
|
||||
def get_questions_scores(self, questions=[]):
|
||||
"""Get scores of those questions
|
||||
|
||||
:param questions: list or dataframe of questions metadatas (need origin field to find the csv)
|
||||
|
||||
:example:
|
||||
>>> loader = CSVLoader("./test_config.yml")
|
||||
>>> exams = loader.get_exams(["Tribe1"])
|
||||
>>> questions = loader.get_exam_questions(exams)
|
||||
>>> scores = loader.get_questions_scores(questions)
|
||||
>>> scores.columns
|
||||
Index(['term', 'exam', 'date', 'exercise', 'question', 'competence', 'theme',
|
||||
'comment', 'score_rate', 'is_leveled', 'origin', 'exam_id',
|
||||
'question_id', 'student_name', 'score'],
|
||||
dtype='object')
|
||||
>>> scores["student_name"].unique()
|
||||
array(['Star Tice', 'Umberto Dingate', 'Starlin Crangle',
|
||||
'Humbert Bourcq', 'Gabriella Handyside', 'Stewart Eaves',
|
||||
'Erick Going', 'Ase Praton', 'Rollins Planks', 'Dunstan Sarjant',
|
||||
'Stacy Guiton', 'Ange Stanes', 'Amabelle Elleton',
|
||||
'Darn Broomhall', 'Dyan Chatto', 'Keane Rennebach', 'Nari Paulton',
|
||||
'Brandy Wase', 'Jaclyn Firidolfi', 'Violette Lockney'],
|
||||
dtype=object)
|
||||
"""
|
||||
scores = []
|
||||
group_questions = questions.groupby("origin")
|
||||
for origin, questions_df in group_questions:
|
||||
scores_df = pd.read_csv(origin)
|
||||
scores_df = self.rename_columns(scores_df)
|
||||
student_names = [
|
||||
c
|
||||
for c in scores_df.columns
|
||||
if c not in self._config["csv_fields"].keys()
|
||||
]
|
||||
|
||||
common_columns = [c for c in questions_df.columns if c in scores_df.columns]
|
||||
scores_df = pd.merge(scores_df, questions_df, on=common_columns)
|
||||
|
||||
kept_columns = [c for c in scores_df if c not in student_names]
|
||||
scores_df = pd.melt(
|
||||
scores_df,
|
||||
id_vars=kept_columns,
|
||||
value_vars=student_names,
|
||||
var_name="student_name",
|
||||
value_name="score",
|
||||
)
|
||||
|
||||
scores_df = scores_df.rename(columns={"id": "question_id"})
|
||||
scores.append(scores_df)
|
||||
|
||||
return pd.concat(scores)
|
||||
|
||||
def get_students(self, tribes=[]):
|
||||
"""Get student list
|
||||
|
||||
:param tribes: concerned tribes
|
||||
|
||||
:example:
|
||||
>>> loader = CSVLoader("./test_config.yml")
|
||||
>>> tribes = loader.get_tribes()
|
||||
>>> students = loader.get_students([tribes["Tribe1"]])
|
||||
>>> students.columns
|
||||
Index(['Nom', 'email', 'origin', 'tribe'], dtype='object')
|
||||
"""
|
||||
students = []
|
||||
for tribe in tribes:
|
||||
students_csv = Path(self._config["source"]) / tribe["students"]
|
||||
students_df = pd.read_csv(students_csv)
|
||||
students_df["origin"] = students_csv
|
||||
students_df["tribe"] = tribe["name"]
|
||||
students.append(students_df)
|
||||
|
||||
return pd.concat(students)
|
||||
|
||||
def get_student_scores(self, student=[]):
|
||||
"""Get all scores for students"""
|
||||
pass
|
||||
7
recopytex/database/filesystem/writer.py
Normal file
7
recopytex/database/filesystem/writer.py
Normal file
@@ -0,0 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
# encoding: utf-8
|
||||
|
||||
"""
|
||||
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user