diff --git a/recopytex/store/__init__.py b/recopytex/database/__init__.py similarity index 90% rename from recopytex/store/__init__.py rename to recopytex/database/__init__.py index b9ed17a..401f663 100644 --- a/recopytex/store/__init__.py +++ b/recopytex/database/__init__.py @@ -71,13 +71,13 @@ class Loader(ABC): """ pass - @abstractmethod - def get_student_scores(self, student): - """Get scores of the student + # @abstractmethod + # def get_student_scores(self, student): + # """Get scores of the student - :param student: - """ - pass + # :param student: + # """ + # pass class Writer(ABC): diff --git a/recopytex/store/filesystem/__init__.py b/recopytex/database/filesystem/__init__.py similarity index 100% rename from recopytex/store/filesystem/__init__.py rename to recopytex/database/filesystem/__init__.py diff --git a/recopytex/store/filesystem/default_config.yml b/recopytex/database/filesystem/default_config.yml similarity index 90% rename from recopytex/store/filesystem/default_config.yml rename to recopytex/database/filesystem/default_config.yml index 54c7787..f960f30 100644 --- a/recopytex/store/filesystem/default_config.yml +++ b/recopytex/database/filesystem/default_config.yml @@ -42,3 +42,6 @@ csv_fields: # dataframe_field: csv_field score_rate: Bareme is_leveled: Est_nivele +id_templates: + exam: "{name}_{tribe}" + question: "{exam_id}_{exercise}_{question}_{comment}" diff --git a/recopytex/store/filesystem/lib.py b/recopytex/database/filesystem/lib.py similarity index 77% rename from recopytex/store/filesystem/lib.py rename to recopytex/database/filesystem/lib.py index 079e7db..5457574 100644 --- a/recopytex/store/filesystem/lib.py +++ b/recopytex/database/filesystem/lib.py @@ -3,6 +3,7 @@ import pandas as pd from pathlib import Path +from unidecode import unidecode __all__ = ["list_csvs", "extract_fields"] @@ -38,3 +39,14 @@ def extract_fields(csv_filename, fields=[], remove_duplicates=True): if remove_duplicates: return df.drop_duplicates() return df + + +def build_id(template, element): + """Build an id from template to the element + + :example: + >>> element = {"name": "pléà", "place": "here", "foo":"bar"} + >>> build_id("{name} {place}", element) + 'plea_here' + """ + return unidecode(template.format(**element)).replace(" ", "_") diff --git a/recopytex/store/filesystem/loader.py b/recopytex/database/filesystem/loader.py similarity index 73% rename from recopytex/store/filesystem/loader.py rename to recopytex/database/filesystem/loader.py index 314ded7..954de5e 100644 --- a/recopytex/store/filesystem/loader.py +++ b/recopytex/database/filesystem/loader.py @@ -7,7 +7,7 @@ import uuid from pathlib import Path import pandas as pd from .. import Loader -from .lib import * +from .lib import list_csvs, extract_fields, build_id DEFAULT_CONFIG_FILE = os.path.join(os.path.dirname(__file__), "default_config.yml") @@ -31,11 +31,10 @@ class CSVLoader(Loader): :example: >>> loader = CSVLoader() >>> loader.get_config() - {'source': './', 'competences': {'Chercher': {'name': 'Chercher', 'abrv': 'Cher'}, 'Représenter': {'name': 'Représenter', 'abrv': 'Rep'}, 'Modéliser': {'name': 'Modéliser', 'abrv': 'Mod'}, 'Raisonner': {'name': 'Raisonner', 'abrv': 'Rai'}, 'Calculer': {'name': 'Calculer', 'abrv': 'Cal'}, 'Communiquer': {'name': 'Communiquer', 'abrv': 'Com'}}, 'valid_scores': {'BAD': 0, 'FEW': 1, 'NEARLY': 2, 'GOOD': 3, 'NOTFILLED': None, 'NOANSWER': '.', 'ABS': 'a'}, 'csv_fields': {'term': 'Trimestre', 'exam': 'Nom', 'date': 'Date', 'exercise': 'Exercice', 'question': 'Question', 'competence': 'Competence', 'theme': 'Domaine', 'comment': 'Commentaire', 'score_rate': 'Bareme', 'is_leveled': 'Est_nivele'}} + {'source': './', 'competences': {'Chercher': {'name': 'Chercher', 'abrv': 'Cher'}, 'Représenter': {'name': 'Représenter', 'abrv': 'Rep'}, 'Modéliser': {'name': 'Modéliser', 'abrv': 'Mod'}, 'Raisonner': {'name': 'Raisonner', 'abrv': 'Rai'}, 'Calculer': {'name': 'Calculer', 'abrv': 'Cal'}, 'Communiquer': {'name': 'Communiquer', 'abrv': 'Com'}}, 'valid_scores': {'BAD': 0, 'FEW': 1, 'NEARLY': 2, 'GOOD': 3, 'NOTFILLED': None, 'NOANSWER': '.', 'ABS': 'a'}, 'csv_fields': {'term': 'Trimestre', 'exam': 'Nom', 'date': 'Date', 'exercise': 'Exercice', 'question': 'Question', 'competence': 'Competence', 'theme': 'Domaine', 'comment': 'Commentaire', 'score_rate': 'Bareme', 'is_leveled': 'Est_nivele'}, 'id_templates': {'exam': '{name}_{tribe}', 'question': '{exam_id}_{exercise}_{question}_{comment}'}} >>> loader = CSVLoader("./test_config.yml") >>> loader.get_config() - {'source': './example', 'competences': {'Chercher': {'name': 'Chercher', 'abrv': 'Cher'}, 'Représenter': {'name': 'Représenter', 'abrv': 'Rep'}, 'Modéliser': {'name': 'Modéliser', 'abrv': 'Mod'}, 'Raisonner': {'name': 'Raisonner', 'abrv': 'Rai'}, 'Calculer': {'name': 'Calculer', 'abrv': 'Cal'}, 'Communiquer': {'name': 'Communiquer', 'abrv': 'Com'}}, 'valid_scores': {'BAD': 0, 'FEW': 1, 'NEARLY': 2, 'GOOD': 3, 'NOTFILLED': None, 'NOANSWER': '.', 'ABS': 'a'}, 'csv_fields': {'term': 'Trimestre', 'exam': 'Nom', 'date': 'Date', 'exercise': 'Exercice', 'question': 'Question', 'competence': 'Competence', 'theme': 'Domaine', 'comment': 'Commentaire', 'score_rate': 'Bareme', 'is_leveled': 'Est_nivele'}, 'output': './output', 'templates': 'templates/', 'tribes': {'Tribe1': {'name': 'Tribe1', 'type': 'Type1', 'students': 'tribe1.csv'}, 'Tribe2': {'name': 'Tribe2', 'students': 'tribe2.csv'}}} - + {'source': './example', 'competences': {'Chercher': {'name': 'Chercher', 'abrv': 'Cher'}, 'Représenter': {'name': 'Représenter', 'abrv': 'Rep'}, 'Modéliser': {'name': 'Modéliser', 'abrv': 'Mod'}, 'Raisonner': {'name': 'Raisonner', 'abrv': 'Rai'}, 'Calculer': {'name': 'Calculer', 'abrv': 'Cal'}, 'Communiquer': {'name': 'Communiquer', 'abrv': 'Com'}}, 'valid_scores': {'BAD': 0, 'FEW': 1, 'NEARLY': 2, 'GOOD': 3, 'NOTFILLED': None, 'NOANSWER': '.', 'ABS': 'a'}, 'csv_fields': {'term': 'Trimestre', 'exam': 'Nom', 'date': 'Date', 'exercise': 'Exercice', 'question': 'Question', 'competence': 'Competence', 'theme': 'Domaine', 'comment': 'Commentaire', 'score_rate': 'Bareme', 'is_leveled': 'Est_nivele'}, 'id_templates': {'exam': '{name}_{tribe}', 'question': '{exam_id}_{exercise}_{question}_{comment}'}, 'output': './output', 'templates': 'templates/', 'tribes': {'Tribe1': {'name': 'Tribe1', 'type': 'Type1', 'students': 'tribe1.csv'}, 'Tribe2': {'name': 'Tribe2', 'students': 'tribe2.csv'}}} """ CONFIG = DEFAULT_CONFIG @@ -45,8 +44,18 @@ class CSVLoader(Loader): return self._config def rename_columns(self, dataframe): - """Rename dataframe column to match with `csv_fields` """ - return dataframe.rename(columns=self._config["csv_fields"]) + """Rename dataframe column to match with `csv_fields` + + :param dataframe: the dataframe + + :example: + >>> loader = CSVLoader() + >>> + + """ + return dataframe.rename( + columns={v: k for k, v in self._config["csv_fields"].items()} + ) def reverse_csv_field(self, keys): """ Reverse csv field from keys """ @@ -76,11 +85,11 @@ class CSVLoader(Loader): >>> loader = CSVLoader("./test_config.yml") >>> exams = loader.get_exams(["Tribe1"]) >>> exams.columns - Index(['Nom', 'Date', 'Trimestre', 'origin', 'tribe', 'id'], dtype='object') - >>> exams.loc[:, exams.columns!="id"] - Nom Date Trimestre origin tribe - 0 DS 12/01/2021 1 example/Tribe1/210112_DS.csv Tribe1 - 0 DS6 22/01/2021 1 example/Tribe1/210122_DS6.csv Tribe1 + Index(['name', 'date', 'term', 'origin', 'tribe', 'id'], dtype='object') + >>> exams + name date term origin tribe id + 0 DS 12/01/2021 1 example/Tribe1/210112_DS.csv Tribe1 DS_Tribe1 + 0 DS6 22/01/2021 1 example/Tribe1/210122_DS6.csv Tribe1 DS6_Tribe1 """ exams = [] for tribe in tribes: @@ -90,9 +99,12 @@ class CSVLoader(Loader): fields = self.reverse_csv_field(["exam", "date", "term"]) exam = extract_fields(csv, fields) exam = self.rename_columns(exam) + exam = exam.rename(columns={"exam": "name"}) exam["origin"] = csv exam["tribe"] = tribe - exam["id"] = uuid.uuid4() + exam["id"] = build_id( + self._config["id_templates"]["exam"], exam.iloc[0] + ) exams.append(exam) return pd.concat(exams) @@ -105,14 +117,22 @@ class CSVLoader(Loader): >>> loader = CSVLoader("./test_config.yml") >>> exams = loader.get_exams(["Tribe1"]) >>> loader.get_exam_questions([exams.iloc[0]]).columns - Index(['Exercice', 'Question', 'Competence', 'Domaine', 'Commentaire', - 'Bareme', 'Est_nivele', 'origin', 'exam_id', 'id'], + Index(['exercise', 'question', 'competence', 'theme', 'comment', 'score_rate', + 'is_leveled', 'origin', 'exam_id', 'id'], dtype='object') >>> questions = loader.get_exam_questions(exams) - >>> questions.columns - Index(['Exercice', 'Question', 'Competence', 'Domaine', 'Commentaire', - 'Bareme', 'Est_nivele', 'origin', 'exam_id', 'id'], - dtype='object') + >>> questions.iloc[0] + exercise Exercice 1 + question 1 + competence Calculer + theme Plop + comment Coucou + score_rate 1.0 + is_leveled 1.0 + origin example/Tribe1/210112_DS.csv + exam_id DS_Tribe1 + id DS_Tribe1_Exercice_1_1_Coucou + Name: 0, dtype: object """ _exams = maybe_dataframe(exams) @@ -133,7 +153,9 @@ class CSVLoader(Loader): question = self.rename_columns(question) question["origin"] = exam["origin"] question["exam_id"] = exam["id"] - question["id"] = uuid.uuid4() + question["id"] = build_id( + self._config["id_templates"]["question"], question.iloc[0] + ) questions.append(question) return pd.concat(questions) @@ -149,7 +171,10 @@ class CSVLoader(Loader): >>> questions = loader.get_exam_questions(exams) >>> scores = loader.get_questions_scores(questions) >>> scores.columns - Index(['origin', 'question_id', 'student_name', 'score'], dtype='object') + Index(['term', 'exam', 'date', 'exercise', 'question', 'competence', 'theme', + 'comment', 'score_rate', 'is_leveled', 'origin', 'exam_id', + 'question_id', 'student_name', 'score'], + dtype='object') >>> scores["student_name"].unique() array(['Star Tice', 'Umberto Dingate', 'Starlin Crangle', 'Humbert Bourcq', 'Gabriella Handyside', 'Stewart Eaves', @@ -163,17 +188,15 @@ class CSVLoader(Loader): group_questions = questions.groupby("origin") for origin, questions_df in group_questions: scores_df = pd.read_csv(origin) + scores_df = self.rename_columns(scores_df) student_names = [ c for c in scores_df.columns - if c not in self._config["csv_fields"].values() + if c not in self._config["csv_fields"].keys() ] common_columns = [c for c in questions_df.columns if c in scores_df.columns] scores_df = pd.merge(scores_df, questions_df, on=common_columns) - scores_df = scores_df.drop( - columns=list(self._config["csv_fields"].values()) + ["exam_id"] - ) kept_columns = [c for c in scores_df if c not in student_names] scores_df = pd.melt( @@ -182,8 +205,9 @@ class CSVLoader(Loader): value_vars=student_names, var_name="student_name", value_name="score", - ).rename(columns={"id": "question_id"}) + ) + scores_df = scores_df.rename(columns={"id": "question_id"}) scores.append(scores_df) return pd.concat(scores) @@ -209,3 +233,7 @@ class CSVLoader(Loader): students.append(students_df) return pd.concat(students) + + def get_student_scores(self, student=[]): + """Get all scores for students""" + pass diff --git a/recopytex/store/filesystem/writer.py b/recopytex/database/filesystem/writer.py similarity index 100% rename from recopytex/store/filesystem/writer.py rename to recopytex/database/filesystem/writer.py