diff --git a/recopytex/store/filesystem/loader.py b/recopytex/store/filesystem/loader.py index c2eab10..d6dfdab 100644 --- a/recopytex/store/filesystem/loader.py +++ b/recopytex/store/filesystem/loader.py @@ -15,6 +15,13 @@ with open(DEFAULT_CONFIG_FILE, "r") as config: DEFAULT_CONFIG = yaml.load(config, Loader=yaml.FullLoader) +def maybe_dataframe(datas): + try: + return [e[1] for e in datas.iterrows()] + except AttributeError: + return datas + + class CSVLoader(Loader): """Loader when scores and metadatas are stored in csv files @@ -99,7 +106,7 @@ class CSVLoader(Loader): def get_exam_questions(self, exams=[]): """Get questions for exams stored in score_files - :param exams: list of exams metadatas (need origin field to find the csv) + :param exams: list or dataframe of exams metadatas (need origin field to find the csv) :example: >>> loader = CSVLoader("./test_config.yml") @@ -114,10 +121,7 @@ class CSVLoader(Loader): 'Bareme', 'Est_nivele', 'origin', 'exam_id', 'id'], dtype='object') """ - try: - _exams = [e[1] for e in exams.iterrows()] - except AttributeError: - _exams = exams + _exams = maybe_dataframe(exams) questions = [] for exam in _exams: @@ -144,9 +148,45 @@ class CSVLoader(Loader): def get_questions_scores(self, questions=[]): """Get scores of those questions - :param questions: score for those questions + :param questions: list or dataframe of questions metadatas (need origin field to find the csv) + + :example: + >>> loader = CSVLoader("./test_config.yml") + >>> exams = loader.get_exams(["Tribe1"]) + >>> questions = loader.get_exam_questions(exams) + >>> scores = loader.get_questions_scores(questions) + >>> scores.columns + Index(['id', 'origin', 'student_name', 'score'], dtype='object') + >>> scores["student_name"].unique() + array(['Star Tice', 'Umberto Dingate', 'Starlin Crangle', + 'Humbert Bourcq', 'Gabriella Handyside', 'Stewart Eaves', + 'Erick Going', 'Ase Praton', 'Rollins Planks', 'Dunstan Sarjant', + 'Stacy Guiton', 'Ange Stanes', 'Amabelle Elleton', + 'Darn Broomhall', 'Dyan Chatto', 'Keane Rennebach', 'Nari Paulton', + 'Brandy Wase', 'Jaclyn Firidolfi', 'Violette Lockney'], + dtype=object) """ - return "" + scores = [] + group_questions = questions.groupby("origin") + for origin, questions_df in group_questions: + scores_df = pd.read_csv(origin) + common_columns = [c for c in questions_df.columns if c in scores_df.columns] + scores_df = pd.merge(scores_df, questions_df, on=common_columns) + scores_df = scores_df.drop( + columns=list(self._config["csv_fields"].values()) + ["exam_id"] + ) + kept_columns = ["id", "origin"] + student_names = [c for c in scores_df.columns if c not in kept_columns] + scores_df = pd.melt( + scores_df, + id_vars=kept_columns, + value_vars=student_names, + var_name="student_name", + value_name="score", + ) + scores.append(scores_df) + + return pd.concat(scores) def get_student_scores(self, student): """Get scores of the student