diff --git a/recopytex/store/filesystem/loader.py b/recopytex/store/filesystem/loader.py index d6dfdab..b86fff7 100644 --- a/recopytex/store/filesystem/loader.py +++ b/recopytex/store/filesystem/loader.py @@ -156,7 +156,7 @@ class CSVLoader(Loader): >>> questions = loader.get_exam_questions(exams) >>> scores = loader.get_questions_scores(questions) >>> scores.columns - Index(['id', 'origin', 'student_name', 'score'], dtype='object') + Index(['origin', 'question_id', 'student_name', 'score'], dtype='object') >>> scores["student_name"].unique() array(['Star Tice', 'Umberto Dingate', 'Starlin Crangle', 'Humbert Bourcq', 'Gabriella Handyside', 'Stewart Eaves', @@ -170,20 +170,27 @@ class CSVLoader(Loader): group_questions = questions.groupby("origin") for origin, questions_df in group_questions: scores_df = pd.read_csv(origin) + student_names = [ + c + for c in scores_df.columns + if c not in self._config["csv_fields"].values() + ] + common_columns = [c for c in questions_df.columns if c in scores_df.columns] scores_df = pd.merge(scores_df, questions_df, on=common_columns) scores_df = scores_df.drop( columns=list(self._config["csv_fields"].values()) + ["exam_id"] ) - kept_columns = ["id", "origin"] - student_names = [c for c in scores_df.columns if c not in kept_columns] + + kept_columns = [c for c in scores_df if c not in student_names] scores_df = pd.melt( scores_df, id_vars=kept_columns, value_vars=student_names, var_name="student_name", value_name="score", - ) + ).rename(columns={"id": "question_id"}) + scores.append(scores_df) return pd.concat(scores)