#!/usr/bin/env python # encoding: utf-8 import pandas as pd import numpy as np from math import ceil, floor import logging logger = logging.getLogger(__name__) # Values manipulations def round_half_point(val): try: return 0.5 * ceil(2.0 * val) except ValueError: return val except TypeError: return val latex_caract = ["\\NoRep", "\\RepZ", "\\RepU", "\\RepD", "\\RepT"] def note_to_rep(x): r""" Transform a Note to the latex caracter :param x: dictionnary with "Niveau" and "Note" keys >>> d = {"Eleve":["E1"]*6 + ["E2"]*6, ... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2, ... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"], ... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"], ... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2, ... "Trimestre": ["1"]*12, ... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2, ... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2, ... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.67, 1, 1.5, 1, 2, 3], ... } >>> df = pd.DataFrame(d) >>> note_to_rep(df.loc[0]) 1.0 >>> note_to_rep(df.loc[4]) '\\RepU' """ if x["Niveau"]: if pd.isnull(x["Note"]): return latex_caract[0] elif x["Note"] in range(4): return latex_caract[int(x["Note"])+1] return x["Note"] def note_to_mark(x): """ Compute the mark when it is a "Niveau" note :param x: dictionnary with "Niveau", "Note" and "Bareme" keys >>> d = {"Eleve":["E1"]*6 + ["E2"]*6, ... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2, ... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"], ... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"], ... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2, ... "Trimestre": ["1"]*12, ... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2, ... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2, ... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3], ... } >>> df = pd.DataFrame(d) >>> note_to_mark(df.loc[0]) 1.0 >>> note_to_mark(df.loc[10]) 1.3333333333333333 """ if x["Niveau"]: return x["Note"] * x["Bareme"] / 3 return x["Note"] def note_to_level(x): """ Compute the level ("na",0,1,2,3). "na" correspond to "no answer" :param x: dictionnary with "Niveau", "Note" and "Bareme" keys >>> d = {"Eleve":["E1"]*6 + ["E2"]*6, ... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2, ... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"], ... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"], ... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2, ... "Trimestre": ["1"]*12, ... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2, ... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2, ... "Note":[1, 0.33, np.nan, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3], ... } >>> df = pd.DataFrame(d) >>> note_to_level(df.loc[0]) 3 >>> note_to_level(df.loc[1]) 1 >>> note_to_level(df.loc[2]) 'na' >>> note_to_level(df.loc[3]) 3 >>> note_to_level(df.loc[5]) 3 >>> note_to_level(df.loc[10]) 2 """ if pd.isnull(x["Note"]): return "na" if x["Niveau"]: return int(x["Note"]) else: return int(ceil(x["Note"] / x["Bareme"] * 3)) def question_uniq_formater(row): """ Create a kind of unique description of the question >>> d = {"Eleve":["E1"]*6 + ["E2"]*6, ... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2, ... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"], ... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"], ... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2, ... "Trimestre": ["1"]*12, ... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2, ... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2, ... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3], ... } >>> df = pd.DataFrame(d) >>> question_uniq_formater(df.loc[0]) 'Ex1 Q1' >>> question_uniq_formater(df.loc[10]) 'Ex1 Q1' """ ans = "" try: int(row['Exercice']) except ValueError: ans += str(row["Exercice"]) else: ans += "Exo"+str(row["Exercice"]) ans += " " try: int(row["Question"]) except ValueError: if not pd.isnull(row["Question"]): ans += str(row["Question"]) else: ans += "Qu"+str(row["Question"]) try: row["Commentaire"] except KeyError: pass else: if not pd.isnull(row["Commentaire"]): ans += " ({})".format(row["Commentaire"]) return ans # DataFrame columns manipulations def compute_marks(df): """ Add Mark column to df :param df: DataFrame with "Note", "Niveau" and "Bareme" columns. >>> d = {"Eleve":["E1"]*6 + ["E2"]*6, ... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2, ... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"], ... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"], ... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2, ... "Trimestre": ["1"]*12, ... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2, ... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2, ... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3], ... } >>> df = pd.DataFrame(d) >>> compute_marks(df) 0 1.00 1 0.33 2 2.00 3 1.50 4 0.67 5 2.00 6 0.67 7 1.00 8 1.50 9 1.00 10 1.33 11 2.00 dtype: float64 """ return df[["Note", "Niveau", "Bareme"]].apply(note_to_mark, axis=1) def compute_level(df): """ Add Mark column to df :param df: DataFrame with "Note", "Niveau" and "Bareme" columns. >>> d = {"Eleve":["E1"]*6 + ["E2"]*6, ... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2, ... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"], ... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"], ... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2, ... "Trimestre": ["1"]*12, ... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2, ... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2, ... "Note":[np.nan, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3], ... } >>> df = pd.DataFrame(d) >>> compute_level(df) 0 na 1 1 2 3 3 3 4 1 5 3 6 2 7 3 8 3 9 2 10 2 11 3 dtype: object """ return df[["Note", "Niveau", "Bareme"]].apply(note_to_level, axis=1) def compute_latex_rep(df): """ Add Latex_rep column to df :param df: DataFrame with "Note" and "Niveau" columns. >>> d = {"Eleve":["E1"]*6 + ["E2"]*6, ... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2, ... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"], ... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"], ... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2, ... "Trimestre": ["1"]*12, ... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2, ... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2, ... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3], ... } >>> df = pd.DataFrame(d) >>> compute_latex_rep(df) 0 1 1 0.33 2 2 3 1.5 4 \RepU 5 \RepT 6 0.67 7 1 8 1.5 9 1 10 \RepD 11 \RepT dtype: object """ return df[["Note", "Niveau"]].apply(note_to_rep, axis=1).fillna("??") def compute_normalized(df): """ Compute the normalized mark (Mark / Bareme) :param df: DataFrame with "Mark" and "Bareme" columns >>> d = {"Eleve":["E1"]*6 + ["E2"]*6, ... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2, ... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"], ... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"], ... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2, ... "Trimestre": ["1"]*12, ... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2, ... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2, ... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3], ... } >>> df = pd.DataFrame(d) >>> df["Mark"] = compute_marks(df) >>> compute_normalized(df) 0 1.00 1 0.33 2 1.00 3 0.75 4 0.33 5 1.00 6 0.67 7 1.00 8 0.75 9 0.50 10 0.67 11 1.00 dtype: float64 """ return df["Mark"] / df["Bareme"] def compute_question_description(df): """ Compute the unique description of a question """ return df.apply(question_uniq_formater, axis = 1) # Computing custom values def compute_exo_marks(df): """ Compute Exercice level marks :param df: the original marks :returns: DataFrame with computed marks >>> d = {"Eleve":["E1"]*6 + ["E2"]*6, ... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2, ... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"], ... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"], ... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2, ... "Trimestre": ["1"]*12, ... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2, ... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2, ... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.67, 1, 1.5, 1, 2, 3], ... } >>> df = pd.DataFrame(d) >>> df["Mark"] = compute_marks(df) >>> compute_exo_marks(df) Eleve Nom Exercice Date Trimestre Bareme Mark Question Niveau 0 E1 N1 Ex1 16/09/2016 1 2.0 1.5 Total 0 1 E1 N1 Ex2 16/09/2016 1 4.0 3.5 Total 0 2 E1 N2 Ex1 01/10/2016 1 2.0 1.0 Total 0 3 E1 N2 Ex2 01/10/2016 1 2.0 2.0 Total 0 4 E2 N1 Ex1 16/09/2016 1 2.0 2.0 Total 0 5 E2 N1 Ex2 16/09/2016 1 4.0 2.5 Total 0 6 E2 N2 Ex1 01/10/2016 1 2.0 1.5 Total 0 7 E2 N2 Ex2 01/10/2016 1 2.0 2.0 Total 0 """ exo_pt = pd.pivot_table(df, index = [ "Eleve", "Nom", "Exercice", "Date", "Trimestre"], values = ["Bareme", "Mark"], aggfunc=np.sum, ).applymap(round_half_point) exo = exo_pt.reset_index() exo["Question"] = "Total" exo["Niveau"] = 0 return exo def compute_eval_marks(df): """ Compute Nom level marks from the dataframe using only row with Total in Question :param df: DataFrame with value Total in Question column :returns: DataFrame with evaluation marks >>> d = {"Eleve":["E1"]*6 + ["E2"]*6, ... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2, ... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"], ... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"], ... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2, ... "Trimestre": ["1"]*12, ... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2, ... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2, ... "Note":[1, 0.33, 2, 1.5, 1, 3, 0.67, 1, 1.5, 1, 2, 3], ... } >>> df = pd.DataFrame(d) >>> df["Mark"] = compute_marks(df) >>> df_exo = compute_exo_marks(df) >>> compute_eval_marks(df_exo) index Eleve Nom Trimestre Bareme Date Mark 0 0 E1 N1 1 6.0 16/09/2016 5.0 1 1 E2 N1 1 6.0 16/09/2016 4.5 2 0 E1 N2 1 4.0 01/10/2016 3.0 3 1 E2 N2 1 4.0 01/10/2016 3.5 """ def date_format(dates): date_l = list(dates.unique()) if len(date_l) == 1: return date_l[0] else: return "Trimestre" eval_m = pd.DataFrame() for eval_name in df["Nom"].unique(): logger.debug(f"Compute marks for {eval_name}") eval_df = df[df["Nom"] == eval_name] dates = eval_df["Date"].unique() logger.debug(f"Find those dates: {dates}") if len(dates) > 1 or dates[0] == "Trimestre": # Les devoirs sur la durée, les NaN ne sont pas pénalisants # On les enlèves eval_df = eval_df.dropna(subset=["Mark"]) dates = ["Trimestre"] eval_pt = pd.pivot_table(eval_df, index = [ "Eleve", "Nom", "Trimestre"], values = ["Bareme", "Mark", "Date"], aggfunc={"Bareme": np.sum, "Mark": np.sum, "Date":lambda x:dates[0]}, ) eval_pt = eval_pt.reset_index() eval_m = pd.concat([eval_m, eval_pt]) eval_m = eval_m.reset_index() return eval_m def digest_flat_df(flat_df): r""" Compute necessary element to make a flat df usable for analysis. >>> from numpy import nan >>> d = {"Eleve":["E1"]*6 + ["E2"]*6, ... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2, ... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"], ... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"], ... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2, ... "Trimestre": ["1"]*12, ... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2, ... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2, ... "Note":[1, 0.33, 2, 1.5, 1, 3, np.nan, 0, 0, np.nan, np.nan, np.nan], ... } >>> df = pd.DataFrame(d) >>> quest_df, exo_df, eval_df = digest_flat_df(df) >>> quest_df[['Eleve', "Nom", "Mark", "Latex_rep", "Normalized", "Uniq_quest", "Level"]] Eleve Nom Mark Latex_rep Normalized Uniq_quest Level 0 E1 N1 1.00 1 1.00 Ex1 Q1 3 1 E1 N1 0.33 0.33 0.33 Ex1 Q2 1 2 E1 N1 2.00 2 1.00 Ex2 Q1 3 3 E1 N1 1.50 1.5 0.75 Ex2 Q2 3 4 E1 N2 0.67 \RepU 0.33 Ex1 Q1 1 5 E1 N2 2.00 \RepT 1.00 Ex2 Q1 3 6 E2 N1 NaN ?? NaN Ex1 Q1 na 7 E2 N1 0.00 0 0.00 Ex1 Q2 0 8 E2 N1 0.00 0 0.00 Ex2 Q1 0 9 E2 N1 NaN ?? NaN Ex2 Q2 na 10 E2 N2 NaN \NoRep NaN Ex1 Q1 na 11 E2 N2 NaN \NoRep NaN Ex2 Q1 na >>> exo_df[['Eleve', "Nom", "Exercice", "Mark", "Normalized"]] Eleve Nom Exercice Mark Normalized 0 E1 N1 Ex1 1.5 0.75 1 E1 N1 Ex2 3.5 0.88 2 E1 N2 Ex1 1.0 0.50 3 E1 N2 Ex2 2.0 1.00 4 E2 N1 Ex1 0.0 0.00 5 E2 N1 Ex2 0.0 0.00 6 E2 N2 Ex1 NaN NaN 7 E2 N2 Ex2 NaN NaN >>> eval_df index Eleve Nom Trimestre Bareme Date Mark Normalized 0 0 E1 N1 1 6.0 16/09/2016 5.0 0.83 1 1 E2 N1 1 6.0 16/09/2016 0.0 0.00 2 0 E1 N2 1 4.0 01/10/2016 3.0 0.75 3 1 E2 N2 1 4.0 01/10/2016 NaN NaN """ # Remove data with "nn" (non notés) df = flat_df.copy()[flat_df["Note"].astype("object") != "nn"] df["Mark"] = compute_marks(df) df["Level"] = compute_level(df) df["Latex_rep"] = compute_latex_rep(df) df["Normalized"] = compute_normalized(df) df["Uniq_quest"] = compute_question_description(df) exo_df = compute_exo_marks(df) exo_df["Normalized"] = compute_normalized(exo_df) eval_df = compute_eval_marks(exo_df) eval_df["Normalized"] = compute_normalized(eval_df) return df, exo_df, eval_df # ----------------------------- # Reglages pour 'vim' # vim:set autoindent expandtab tabstop=4 shiftwidth=4: # cursor: 16 del