Feat: rename store to database and fix exam and question ids
This commit is contained in:
parent
a292fe23e0
commit
6331573940
@ -71,13 +71,13 @@ class Loader(ABC):
|
|||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
# @abstractmethod
|
||||||
def get_student_scores(self, student):
|
# def get_student_scores(self, student):
|
||||||
"""Get scores of the student
|
# """Get scores of the student
|
||||||
|
|
||||||
:param student:
|
# :param student:
|
||||||
"""
|
# """
|
||||||
pass
|
# pass
|
||||||
|
|
||||||
|
|
||||||
class Writer(ABC):
|
class Writer(ABC):
|
@ -42,3 +42,6 @@ csv_fields: # dataframe_field: csv_field
|
|||||||
score_rate: Bareme
|
score_rate: Bareme
|
||||||
is_leveled: Est_nivele
|
is_leveled: Est_nivele
|
||||||
|
|
||||||
|
id_templates:
|
||||||
|
exam: "{name}_{tribe}"
|
||||||
|
question: "{exam_id}_{exercise}_{question}_{comment}"
|
@ -3,6 +3,7 @@
|
|||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from unidecode import unidecode
|
||||||
|
|
||||||
|
|
||||||
__all__ = ["list_csvs", "extract_fields"]
|
__all__ = ["list_csvs", "extract_fields"]
|
||||||
@ -38,3 +39,14 @@ def extract_fields(csv_filename, fields=[], remove_duplicates=True):
|
|||||||
if remove_duplicates:
|
if remove_duplicates:
|
||||||
return df.drop_duplicates()
|
return df.drop_duplicates()
|
||||||
return df
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
def build_id(template, element):
|
||||||
|
"""Build an id from template to the element
|
||||||
|
|
||||||
|
:example:
|
||||||
|
>>> element = {"name": "pléà", "place": "here", "foo":"bar"}
|
||||||
|
>>> build_id("{name} {place}", element)
|
||||||
|
'plea_here'
|
||||||
|
"""
|
||||||
|
return unidecode(template.format(**element)).replace(" ", "_")
|
@ -7,7 +7,7 @@ import uuid
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from .. import Loader
|
from .. import Loader
|
||||||
from .lib import *
|
from .lib import list_csvs, extract_fields, build_id
|
||||||
|
|
||||||
|
|
||||||
DEFAULT_CONFIG_FILE = os.path.join(os.path.dirname(__file__), "default_config.yml")
|
DEFAULT_CONFIG_FILE = os.path.join(os.path.dirname(__file__), "default_config.yml")
|
||||||
@ -31,11 +31,10 @@ class CSVLoader(Loader):
|
|||||||
:example:
|
:example:
|
||||||
>>> loader = CSVLoader()
|
>>> loader = CSVLoader()
|
||||||
>>> loader.get_config()
|
>>> loader.get_config()
|
||||||
{'source': './', 'competences': {'Chercher': {'name': 'Chercher', 'abrv': 'Cher'}, 'Représenter': {'name': 'Représenter', 'abrv': 'Rep'}, 'Modéliser': {'name': 'Modéliser', 'abrv': 'Mod'}, 'Raisonner': {'name': 'Raisonner', 'abrv': 'Rai'}, 'Calculer': {'name': 'Calculer', 'abrv': 'Cal'}, 'Communiquer': {'name': 'Communiquer', 'abrv': 'Com'}}, 'valid_scores': {'BAD': 0, 'FEW': 1, 'NEARLY': 2, 'GOOD': 3, 'NOTFILLED': None, 'NOANSWER': '.', 'ABS': 'a'}, 'csv_fields': {'term': 'Trimestre', 'exam': 'Nom', 'date': 'Date', 'exercise': 'Exercice', 'question': 'Question', 'competence': 'Competence', 'theme': 'Domaine', 'comment': 'Commentaire', 'score_rate': 'Bareme', 'is_leveled': 'Est_nivele'}}
|
{'source': './', 'competences': {'Chercher': {'name': 'Chercher', 'abrv': 'Cher'}, 'Représenter': {'name': 'Représenter', 'abrv': 'Rep'}, 'Modéliser': {'name': 'Modéliser', 'abrv': 'Mod'}, 'Raisonner': {'name': 'Raisonner', 'abrv': 'Rai'}, 'Calculer': {'name': 'Calculer', 'abrv': 'Cal'}, 'Communiquer': {'name': 'Communiquer', 'abrv': 'Com'}}, 'valid_scores': {'BAD': 0, 'FEW': 1, 'NEARLY': 2, 'GOOD': 3, 'NOTFILLED': None, 'NOANSWER': '.', 'ABS': 'a'}, 'csv_fields': {'term': 'Trimestre', 'exam': 'Nom', 'date': 'Date', 'exercise': 'Exercice', 'question': 'Question', 'competence': 'Competence', 'theme': 'Domaine', 'comment': 'Commentaire', 'score_rate': 'Bareme', 'is_leveled': 'Est_nivele'}, 'id_templates': {'exam': '{name}_{tribe}', 'question': '{exam_id}_{exercise}_{question}_{comment}'}}
|
||||||
>>> loader = CSVLoader("./test_config.yml")
|
>>> loader = CSVLoader("./test_config.yml")
|
||||||
>>> loader.get_config()
|
>>> loader.get_config()
|
||||||
{'source': './example', 'competences': {'Chercher': {'name': 'Chercher', 'abrv': 'Cher'}, 'Représenter': {'name': 'Représenter', 'abrv': 'Rep'}, 'Modéliser': {'name': 'Modéliser', 'abrv': 'Mod'}, 'Raisonner': {'name': 'Raisonner', 'abrv': 'Rai'}, 'Calculer': {'name': 'Calculer', 'abrv': 'Cal'}, 'Communiquer': {'name': 'Communiquer', 'abrv': 'Com'}}, 'valid_scores': {'BAD': 0, 'FEW': 1, 'NEARLY': 2, 'GOOD': 3, 'NOTFILLED': None, 'NOANSWER': '.', 'ABS': 'a'}, 'csv_fields': {'term': 'Trimestre', 'exam': 'Nom', 'date': 'Date', 'exercise': 'Exercice', 'question': 'Question', 'competence': 'Competence', 'theme': 'Domaine', 'comment': 'Commentaire', 'score_rate': 'Bareme', 'is_leveled': 'Est_nivele'}, 'output': './output', 'templates': 'templates/', 'tribes': {'Tribe1': {'name': 'Tribe1', 'type': 'Type1', 'students': 'tribe1.csv'}, 'Tribe2': {'name': 'Tribe2', 'students': 'tribe2.csv'}}}
|
{'source': './example', 'competences': {'Chercher': {'name': 'Chercher', 'abrv': 'Cher'}, 'Représenter': {'name': 'Représenter', 'abrv': 'Rep'}, 'Modéliser': {'name': 'Modéliser', 'abrv': 'Mod'}, 'Raisonner': {'name': 'Raisonner', 'abrv': 'Rai'}, 'Calculer': {'name': 'Calculer', 'abrv': 'Cal'}, 'Communiquer': {'name': 'Communiquer', 'abrv': 'Com'}}, 'valid_scores': {'BAD': 0, 'FEW': 1, 'NEARLY': 2, 'GOOD': 3, 'NOTFILLED': None, 'NOANSWER': '.', 'ABS': 'a'}, 'csv_fields': {'term': 'Trimestre', 'exam': 'Nom', 'date': 'Date', 'exercise': 'Exercice', 'question': 'Question', 'competence': 'Competence', 'theme': 'Domaine', 'comment': 'Commentaire', 'score_rate': 'Bareme', 'is_leveled': 'Est_nivele'}, 'id_templates': {'exam': '{name}_{tribe}', 'question': '{exam_id}_{exercise}_{question}_{comment}'}, 'output': './output', 'templates': 'templates/', 'tribes': {'Tribe1': {'name': 'Tribe1', 'type': 'Type1', 'students': 'tribe1.csv'}, 'Tribe2': {'name': 'Tribe2', 'students': 'tribe2.csv'}}}
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
CONFIG = DEFAULT_CONFIG
|
CONFIG = DEFAULT_CONFIG
|
||||||
@ -45,8 +44,18 @@ class CSVLoader(Loader):
|
|||||||
return self._config
|
return self._config
|
||||||
|
|
||||||
def rename_columns(self, dataframe):
|
def rename_columns(self, dataframe):
|
||||||
"""Rename dataframe column to match with `csv_fields` """
|
"""Rename dataframe column to match with `csv_fields`
|
||||||
return dataframe.rename(columns=self._config["csv_fields"])
|
|
||||||
|
:param dataframe: the dataframe
|
||||||
|
|
||||||
|
:example:
|
||||||
|
>>> loader = CSVLoader()
|
||||||
|
>>>
|
||||||
|
|
||||||
|
"""
|
||||||
|
return dataframe.rename(
|
||||||
|
columns={v: k for k, v in self._config["csv_fields"].items()}
|
||||||
|
)
|
||||||
|
|
||||||
def reverse_csv_field(self, keys):
|
def reverse_csv_field(self, keys):
|
||||||
""" Reverse csv field from keys """
|
""" Reverse csv field from keys """
|
||||||
@ -76,11 +85,11 @@ class CSVLoader(Loader):
|
|||||||
>>> loader = CSVLoader("./test_config.yml")
|
>>> loader = CSVLoader("./test_config.yml")
|
||||||
>>> exams = loader.get_exams(["Tribe1"])
|
>>> exams = loader.get_exams(["Tribe1"])
|
||||||
>>> exams.columns
|
>>> exams.columns
|
||||||
Index(['Nom', 'Date', 'Trimestre', 'origin', 'tribe', 'id'], dtype='object')
|
Index(['name', 'date', 'term', 'origin', 'tribe', 'id'], dtype='object')
|
||||||
>>> exams.loc[:, exams.columns!="id"]
|
>>> exams
|
||||||
Nom Date Trimestre origin tribe
|
name date term origin tribe id
|
||||||
0 DS 12/01/2021 1 example/Tribe1/210112_DS.csv Tribe1
|
0 DS 12/01/2021 1 example/Tribe1/210112_DS.csv Tribe1 DS_Tribe1
|
||||||
0 DS6 22/01/2021 1 example/Tribe1/210122_DS6.csv Tribe1
|
0 DS6 22/01/2021 1 example/Tribe1/210122_DS6.csv Tribe1 DS6_Tribe1
|
||||||
"""
|
"""
|
||||||
exams = []
|
exams = []
|
||||||
for tribe in tribes:
|
for tribe in tribes:
|
||||||
@ -90,9 +99,12 @@ class CSVLoader(Loader):
|
|||||||
fields = self.reverse_csv_field(["exam", "date", "term"])
|
fields = self.reverse_csv_field(["exam", "date", "term"])
|
||||||
exam = extract_fields(csv, fields)
|
exam = extract_fields(csv, fields)
|
||||||
exam = self.rename_columns(exam)
|
exam = self.rename_columns(exam)
|
||||||
|
exam = exam.rename(columns={"exam": "name"})
|
||||||
exam["origin"] = csv
|
exam["origin"] = csv
|
||||||
exam["tribe"] = tribe
|
exam["tribe"] = tribe
|
||||||
exam["id"] = uuid.uuid4()
|
exam["id"] = build_id(
|
||||||
|
self._config["id_templates"]["exam"], exam.iloc[0]
|
||||||
|
)
|
||||||
exams.append(exam)
|
exams.append(exam)
|
||||||
return pd.concat(exams)
|
return pd.concat(exams)
|
||||||
|
|
||||||
@ -105,14 +117,22 @@ class CSVLoader(Loader):
|
|||||||
>>> loader = CSVLoader("./test_config.yml")
|
>>> loader = CSVLoader("./test_config.yml")
|
||||||
>>> exams = loader.get_exams(["Tribe1"])
|
>>> exams = loader.get_exams(["Tribe1"])
|
||||||
>>> loader.get_exam_questions([exams.iloc[0]]).columns
|
>>> loader.get_exam_questions([exams.iloc[0]]).columns
|
||||||
Index(['Exercice', 'Question', 'Competence', 'Domaine', 'Commentaire',
|
Index(['exercise', 'question', 'competence', 'theme', 'comment', 'score_rate',
|
||||||
'Bareme', 'Est_nivele', 'origin', 'exam_id', 'id'],
|
'is_leveled', 'origin', 'exam_id', 'id'],
|
||||||
dtype='object')
|
dtype='object')
|
||||||
>>> questions = loader.get_exam_questions(exams)
|
>>> questions = loader.get_exam_questions(exams)
|
||||||
>>> questions.columns
|
>>> questions.iloc[0]
|
||||||
Index(['Exercice', 'Question', 'Competence', 'Domaine', 'Commentaire',
|
exercise Exercice 1
|
||||||
'Bareme', 'Est_nivele', 'origin', 'exam_id', 'id'],
|
question 1
|
||||||
dtype='object')
|
competence Calculer
|
||||||
|
theme Plop
|
||||||
|
comment Coucou
|
||||||
|
score_rate 1.0
|
||||||
|
is_leveled 1.0
|
||||||
|
origin example/Tribe1/210112_DS.csv
|
||||||
|
exam_id DS_Tribe1
|
||||||
|
id DS_Tribe1_Exercice_1_1_Coucou
|
||||||
|
Name: 0, dtype: object
|
||||||
"""
|
"""
|
||||||
_exams = maybe_dataframe(exams)
|
_exams = maybe_dataframe(exams)
|
||||||
|
|
||||||
@ -133,7 +153,9 @@ class CSVLoader(Loader):
|
|||||||
question = self.rename_columns(question)
|
question = self.rename_columns(question)
|
||||||
question["origin"] = exam["origin"]
|
question["origin"] = exam["origin"]
|
||||||
question["exam_id"] = exam["id"]
|
question["exam_id"] = exam["id"]
|
||||||
question["id"] = uuid.uuid4()
|
question["id"] = build_id(
|
||||||
|
self._config["id_templates"]["question"], question.iloc[0]
|
||||||
|
)
|
||||||
questions.append(question)
|
questions.append(question)
|
||||||
|
|
||||||
return pd.concat(questions)
|
return pd.concat(questions)
|
||||||
@ -149,7 +171,10 @@ class CSVLoader(Loader):
|
|||||||
>>> questions = loader.get_exam_questions(exams)
|
>>> questions = loader.get_exam_questions(exams)
|
||||||
>>> scores = loader.get_questions_scores(questions)
|
>>> scores = loader.get_questions_scores(questions)
|
||||||
>>> scores.columns
|
>>> scores.columns
|
||||||
Index(['origin', 'question_id', 'student_name', 'score'], dtype='object')
|
Index(['term', 'exam', 'date', 'exercise', 'question', 'competence', 'theme',
|
||||||
|
'comment', 'score_rate', 'is_leveled', 'origin', 'exam_id',
|
||||||
|
'question_id', 'student_name', 'score'],
|
||||||
|
dtype='object')
|
||||||
>>> scores["student_name"].unique()
|
>>> scores["student_name"].unique()
|
||||||
array(['Star Tice', 'Umberto Dingate', 'Starlin Crangle',
|
array(['Star Tice', 'Umberto Dingate', 'Starlin Crangle',
|
||||||
'Humbert Bourcq', 'Gabriella Handyside', 'Stewart Eaves',
|
'Humbert Bourcq', 'Gabriella Handyside', 'Stewart Eaves',
|
||||||
@ -163,17 +188,15 @@ class CSVLoader(Loader):
|
|||||||
group_questions = questions.groupby("origin")
|
group_questions = questions.groupby("origin")
|
||||||
for origin, questions_df in group_questions:
|
for origin, questions_df in group_questions:
|
||||||
scores_df = pd.read_csv(origin)
|
scores_df = pd.read_csv(origin)
|
||||||
|
scores_df = self.rename_columns(scores_df)
|
||||||
student_names = [
|
student_names = [
|
||||||
c
|
c
|
||||||
for c in scores_df.columns
|
for c in scores_df.columns
|
||||||
if c not in self._config["csv_fields"].values()
|
if c not in self._config["csv_fields"].keys()
|
||||||
]
|
]
|
||||||
|
|
||||||
common_columns = [c for c in questions_df.columns if c in scores_df.columns]
|
common_columns = [c for c in questions_df.columns if c in scores_df.columns]
|
||||||
scores_df = pd.merge(scores_df, questions_df, on=common_columns)
|
scores_df = pd.merge(scores_df, questions_df, on=common_columns)
|
||||||
scores_df = scores_df.drop(
|
|
||||||
columns=list(self._config["csv_fields"].values()) + ["exam_id"]
|
|
||||||
)
|
|
||||||
|
|
||||||
kept_columns = [c for c in scores_df if c not in student_names]
|
kept_columns = [c for c in scores_df if c not in student_names]
|
||||||
scores_df = pd.melt(
|
scores_df = pd.melt(
|
||||||
@ -182,8 +205,9 @@ class CSVLoader(Loader):
|
|||||||
value_vars=student_names,
|
value_vars=student_names,
|
||||||
var_name="student_name",
|
var_name="student_name",
|
||||||
value_name="score",
|
value_name="score",
|
||||||
).rename(columns={"id": "question_id"})
|
)
|
||||||
|
|
||||||
|
scores_df = scores_df.rename(columns={"id": "question_id"})
|
||||||
scores.append(scores_df)
|
scores.append(scores_df)
|
||||||
|
|
||||||
return pd.concat(scores)
|
return pd.concat(scores)
|
||||||
@ -209,3 +233,7 @@ class CSVLoader(Loader):
|
|||||||
students.append(students_df)
|
students.append(students_df)
|
||||||
|
|
||||||
return pd.concat(students)
|
return pd.concat(students)
|
||||||
|
|
||||||
|
def get_student_scores(self, student=[]):
|
||||||
|
"""Get all scores for students"""
|
||||||
|
pass
|
Loading…
Reference in New Issue
Block a user