Feat: get_questions_scores wrote

2021-04-06 19:37:12 +02:00
parent 596a52b1d0
commit 94f94dae84
1 changed files with 47 additions and 7 deletions
--- a/recopytex/store/filesystem/loader.py
+++ b/recopytex/store/filesystem/loader.py
@@ -15,6 +15,13 @@ with open(DEFAULT_CONFIG_FILE, "r") as config:
    DEFAULT_CONFIG = yaml.load(config, Loader=yaml.FullLoader)


+def maybe_dataframe(datas):
+    try:
+        return [e[1] for e in datas.iterrows()]
+    except AttributeError:
+        return datas
+
+
 class CSVLoader(Loader):

    """Loader when scores and metadatas are stored in csv files
@@ -99,7 +106,7 @@ class CSVLoader(Loader):
    def get_exam_questions(self, exams=[]):
        """Get questions for exams stored in score_files

-        :param exams: list of exams metadatas (need origin field to find the csv)
+        :param exams: list or dataframe of exams metadatas (need origin field to find the csv)

        :example:
        >>> loader = CSVLoader("./test_config.yml")
@@ -114,10 +121,7 @@ class CSVLoader(Loader):
               'Bareme', 'Est_nivele', 'origin', 'exam_id', 'id'],
              dtype='object')
        """
-        try:
-            _exams = [e[1] for e in exams.iterrows()]
-        except AttributeError:
-            _exams = exams
+        _exams = maybe_dataframe(exams)

        questions = []
        for exam in _exams:
@@ -144,9 +148,45 @@ class CSVLoader(Loader):
    def get_questions_scores(self, questions=[]):
        """Get scores of those questions

-        :param questions: score for those questions
+        :param questions: list or dataframe of questions metadatas (need origin field to find the csv)
+
+        :example:
+        >>> loader = CSVLoader("./test_config.yml")
+        >>> exams = loader.get_exams(["Tribe1"])
+        >>> questions = loader.get_exam_questions(exams)
+        >>> scores = loader.get_questions_scores(questions)
+        >>> scores.columns
+        Index(['id', 'origin', 'student_name', 'score'], dtype='object')
+        >>> scores["student_name"].unique()
+        array(['Star Tice', 'Umberto Dingate', 'Starlin Crangle',
+               'Humbert Bourcq', 'Gabriella Handyside', 'Stewart Eaves',
+               'Erick Going', 'Ase Praton', 'Rollins Planks', 'Dunstan Sarjant',
+               'Stacy Guiton', 'Ange Stanes', 'Amabelle Elleton',
+               'Darn Broomhall', 'Dyan Chatto', 'Keane Rennebach', 'Nari Paulton',
+               'Brandy Wase', 'Jaclyn Firidolfi', 'Violette Lockney'],
+              dtype=object)
        """
-        return ""
+        scores = []
+        group_questions = questions.groupby("origin")
+        for origin, questions_df in group_questions:
+            scores_df = pd.read_csv(origin)
+            common_columns = [c for c in questions_df.columns if c in scores_df.columns]
+            scores_df = pd.merge(scores_df, questions_df, on=common_columns)
+            scores_df = scores_df.drop(
+                columns=list(self._config["csv_fields"].values()) + ["exam_id"]
+            )
+            kept_columns = ["id", "origin"]
+            student_names = [c for c in scores_df.columns if c not in kept_columns]
+            scores_df = pd.melt(
+                scores_df,
+                id_vars=kept_columns,
+                value_vars=student_names,
+                var_name="student_name",
+                value_name="score",
+            )
+            scores.append(scores_df)
+
+        return pd.concat(scores)

    def get_student_scores(self, student):
        """Get scores of the student