#!/usr/bin/env python # encoding: utf-8 import pandas as pd import numpy as np from math import ceil # Values manipulations def round_half_point(val): try: return 0.5 * ceil(2.0 * val) except ValueError: return val except TypeError: return val latex_caract = ["\\NoRep", "\\RepZ", "\\RepU", "\\RepD", "\\RepT"] def note_to_rep(x): r""" Transform a Note to the latex caracter :param x: dictionnary with "Niveau" and "Note" keys >>> d = {"Eleve":["E1"]*6 + ["E2"]*6, ... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2, ... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"], ... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"], ... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2, ... "Trimestre": ["1"]*12, ... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2, ... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2, ... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3], ... } >>> df = pd.DataFrame(d) >>> note_to_rep(df.loc[0]) 1.0 >>> note_to_rep(df.loc[4]) '\\RepU' """ if x["Niveau"]: if pd.isnull(x["Note"]): return latex_caract[0] elif x["Note"] in range(4): return latex_caract[int(x["Note"])+1] return x["Note"] def note_to_mark(x): """ Compute the mark when it is a "Nivea" note :param x: dictionnary with "Niveau", "Note" and "Bareme" keys >>> d = {"Eleve":["E1"]*6 + ["E2"]*6, ... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2, ... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"], ... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"], ... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2, ... "Trimestre": ["1"]*12, ... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2, ... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2, ... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3], ... } >>> df = pd.DataFrame(d) >>> note_to_mark(df.loc[0]) 1.0 >>> note_to_mark(df.loc[10]) 1.3333333333333333 """ if x["Niveau"]: return x["Note"] * x["Bareme"] / 3 return x["Note"] def question_uniq_formater(row): """ Create a kind of unique description of the question >>> d = {"Eleve":["E1"]*6 + ["E2"]*6, ... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2, ... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"], ... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"], ... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2, ... "Trimestre": ["1"]*12, ... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2, ... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2, ... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3], ... } >>> df = pd.DataFrame(d) >>> question_uniq_formater(df.loc[0]) 'Ex1 Q1' >>> question_uniq_formater(df.loc[10]) 'Ex1 Q1' """ ans = "" try: int(row['Exercice']) except ValueError: ans += str(row["Exercice"]) else: ans += "Exo"+str(row["Exercice"]) ans += " " try: int(row["Question"]) except ValueError: if not pd.isnull(row["Question"]): ans += str(row["Question"]) else: ans += "Qu"+str(row["Question"]) try: row["Commentaire"] except KeyError: pass else: if not pd.isnull(row["Commentaire"]): ans += " ({})".format(row["Commentaire"]) return ans # DataFrame columns manipulations def compute_marks(df): """ Add Mark column to df :param df: DataFrame with "Note", "Niveau" and "Bareme" columns. >>> d = {"Eleve":["E1"]*6 + ["E2"]*6, ... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2, ... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"], ... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"], ... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2, ... "Trimestre": ["1"]*12, ... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2, ... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2, ... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3], ... } >>> df = pd.DataFrame(d) >>> compute_marks(df) 0 1.000000 1 0.330000 2 2.000000 3 1.500000 4 0.666667 5 2.000000 6 0.666000 7 1.000000 8 1.500000 9 1.000000 10 1.333333 11 2.000000 dtype: float64 """ return df[["Note", "Niveau", "Bareme"]].apply(note_to_mark, axis=1) def compute_latex_rep(df): """ Add Latex_rep column to df :param df: DataFrame with "Note" and "Niveau" columns. >>> d = {"Eleve":["E1"]*6 + ["E2"]*6, ... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2, ... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"], ... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"], ... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2, ... "Trimestre": ["1"]*12, ... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2, ... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2, ... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3], ... } >>> df = pd.DataFrame(d) >>> compute_latex_rep(df) 0 1 1 0.33 2 2 3 1.5 4 \RepU 5 \RepT 6 0.666 7 1 8 1.5 9 1 10 \RepD 11 \RepT dtype: object """ return df[["Note", "Niveau"]].apply(note_to_rep, axis=1).fillna("??") def compute_normalized(df): """ Compute the normalized mark (Mark / Bareme) :param df: DataFrame with "Mark" and "Bareme" columns >>> d = {"Eleve":["E1"]*6 + ["E2"]*6, ... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2, ... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"], ... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"], ... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2, ... "Trimestre": ["1"]*12, ... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2, ... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2, ... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3], ... } >>> df = pd.DataFrame(d) >>> df["Mark"] = compute_marks(df) >>> compute_normalized(df) 0 1.000000 1 0.330000 2 1.000000 3 0.750000 4 0.333333 5 1.000000 6 0.666000 7 1.000000 8 0.750000 9 0.500000 10 0.666667 11 1.000000 dtype: float64 """ return df["Mark"] / df["Bareme"] def compute_question_description(df): """ Compute the unique description of a question """ return df.apply(question_uniq_formater, axis = 1) # Computing custom values def compute_exo_marks(df): """ Compute Exercice level marks :param df: the original marks :returns: DataFrame with computed marks >>> d = {"Eleve":["E1"]*6 + ["E2"]*6, ... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2, ... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"], ... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"], ... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2, ... "Trimestre": ["1"]*12, ... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2, ... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2, ... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3], ... } >>> df = pd.DataFrame(d) >>> df["Mark"] = compute_marks(df) >>> compute_exo_marks(df) Eleve Nom Exercice Date Trimestre Bareme Mark Question Niveau 0 E1 N1 Ex1 16/09/2016 1 2.0 1.5 Total 0 1 E1 N1 Ex2 16/09/2016 1 4.0 3.5 Total 0 2 E1 N2 Ex1 01/10/2016 1 2.0 1.0 Total 0 3 E1 N2 Ex2 01/10/2016 1 2.0 2.0 Total 0 4 E2 N1 Ex1 16/09/2016 1 2.0 2.0 Total 0 5 E2 N1 Ex2 16/09/2016 1 4.0 2.5 Total 0 6 E2 N2 Ex1 01/10/2016 1 2.0 1.5 Total 0 7 E2 N2 Ex2 01/10/2016 1 2.0 2.0 Total 0 """ exo_pt = pd.pivot_table(df, index = [ "Eleve", "Nom", "Exercice", "Date", "Trimestre"], values = ["Bareme", "Mark"], aggfunc=np.sum, ).applymap(round_half_point) exo = exo_pt.reset_index() exo["Question"] = "Total" exo["Niveau"] = 0 return exo def compute_eval_marks(df): """ Compute Nom level marks from the dataframe using only row with Total in Question :param df: DataFrame with value Total in Question column :returns: DataFrame with evaluation marks >>> d = {"Eleve":["E1"]*6 + ["E2"]*6, ... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2, ... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"], ... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"], ... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2, ... "Trimestre": ["1"]*12, ... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2, ... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2, ... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3], ... } >>> df = pd.DataFrame(d) >>> df["Mark"] = compute_marks(df) >>> df_exo = compute_exo_marks(df) >>> compute_eval_marks(df_exo) index Eleve Nom Trimestre Bareme Date Mark 0 0 E1 N1 1 6.0 16/09/2016 5.0 1 1 E2 N1 1 6.0 16/09/2016 4.5 2 0 E1 N2 1 4.0 01/10/2016 3.0 3 1 E2 N2 1 4.0 01/10/2016 3.5 """ def date_format(dates): date_l = list(dates.unique()) if len(date_l) == 1: return date_l[0] else: return "Trimestre" eval_m = pd.DataFrame() for eval_name in df["Nom"].unique(): eval_df = df[df["Nom"] == eval_name] dates = eval_df["Date"].unique() if len(dates) > 1: # Les devoirs sur la durée, les NaN ne sont pas pénalisants # On les enlèves eval_df = eval_df.dropna(subset=["Mark"]) dates = ["Trimestre"] eval_pt = pd.pivot_table(eval_df, index = [ "Eleve", "Nom", "Trimestre"], values = ["Bareme", "Mark", "Date"], aggfunc={"Bareme": np.sum, "Mark": np.sum, "Date":lambda x:dates[0]}, ) eval_pt = eval_pt.reset_index() eval_m = pd.concat([eval_m, eval_pt]) eval_m = eval_m.reset_index() return eval_m def digest_flat_df(flat_df): r""" Compute necessary element to make a flat df usable for analysis. >>> from numpy import nan >>> d = {"Eleve":["E1"]*6 + ["E2"]*6, ... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2, ... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"], ... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"], ... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2, ... "Trimestre": ["1"]*12, ... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2, ... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2, ... "Note":[1, 0.33, 2, 1.5, 1, 3, nan, 0, 0, nan, nan, nan], ... } >>> df = pd.DataFrame(d) >>> quest_df, exo_df, eval_df = digest_flat_df(df) >>> quest_df[['Eleve', "Nom", "Mark", "Latex_rep", "Normalized", "Uniq_quest"]] Eleve Nom Mark Latex_rep Normalized Uniq_quest 0 E1 N1 1.000000 1 1.000000 Ex1 Q1 1 E1 N1 0.330000 0.33 0.330000 Ex1 Q2 2 E1 N1 2.000000 2 1.000000 Ex2 Q1 3 E1 N1 1.500000 1.5 0.750000 Ex2 Q2 4 E1 N2 0.666667 \RepU 0.333333 Ex1 Q1 5 E1 N2 2.000000 \RepT 1.000000 Ex2 Q1 6 E2 N1 NaN ?? NaN Ex1 Q1 7 E2 N1 0.000000 0 0.000000 Ex1 Q2 8 E2 N1 0.000000 0 0.000000 Ex2 Q1 9 E2 N1 NaN ?? NaN Ex2 Q2 10 E2 N2 NaN \NoRep NaN Ex1 Q1 11 E2 N2 NaN \NoRep NaN Ex2 Q1 >>> exo_df[['Eleve', "Nom", "Exercice", "Mark", "Normalized"]] Eleve Nom Exercice Mark Normalized 0 E1 N1 Ex1 1.5 0.750 1 E1 N1 Ex2 3.5 0.875 2 E1 N2 Ex1 1.0 0.500 3 E1 N2 Ex2 2.0 1.000 4 E2 N1 Ex1 0.0 0.000 5 E2 N1 Ex2 0.0 0.000 6 E2 N2 Ex1 NaN NaN 7 E2 N2 Ex2 NaN NaN >>> eval_df index Eleve Nom Trimestre Bareme Date Mark Normalized 0 0 E1 N1 1 6.0 16/09/2016 5.0 0.833333 1 1 E2 N1 1 6.0 16/09/2016 0.0 0.000000 2 0 E1 N2 1 4.0 01/10/2016 3.0 0.750000 3 1 E2 N2 1 4.0 01/10/2016 NaN NaN """ df = flat_df.copy() df["Mark"] = compute_marks(flat_df) df["Latex_rep"] = compute_latex_rep(flat_df) df["Normalized"] = compute_normalized(df) df["Uniq_quest"] = compute_question_description(df) exo_df = compute_exo_marks(df) exo_df["Normalized"] = compute_normalized(exo_df) eval_df = compute_eval_marks(exo_df) eval_df["Normalized"] = compute_normalized(eval_df) return df, exo_df, eval_df # ----------------------------- # Reglages pour 'vim' # vim:set autoindent expandtab tabstop=4 shiftwidth=4: # cursor: 16 del