diff --git a/notes_tools/tools/df_marks_manip.py b/notes_tools/tools/df_marks_manip.py index 64f0967..0d79b5e 100644 --- a/notes_tools/tools/df_marks_manip.py +++ b/notes_tools/tools/df_marks_manip.py @@ -3,7 +3,7 @@ import pandas as pd import numpy as np -from math import ceil +from math import ceil, floor import logging logger = logging.getLogger(__name__) @@ -47,7 +47,7 @@ def note_to_rep(x): return x["Note"] def note_to_mark(x): - """ Compute the mark when it is a "Nivea" note + """ Compute the mark when it is a "Niveau" note :param x: dictionnary with "Niveau", "Note" and "Bareme" keys @@ -72,6 +72,46 @@ def note_to_mark(x): return x["Note"] * x["Bareme"] / 3 return x["Note"] +def note_to_level(x): + """ Compute the level ("na",0,1,2,3). + + "na" correspond to "no answer" + + :param x: dictionnary with "Niveau", "Note" and "Bareme" keys + + >>> d = {"Eleve":["E1"]*6 + ["E2"]*6, + ... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2, + ... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"], + ... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"], + ... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2, + ... "Trimestre": ["1"]*12, + ... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2, + ... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2, + ... "Note":[1, 0.33, np.nan, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3], + ... } + >>> df = pd.DataFrame(d) + >>> note_to_level(df.loc[0]) + 3 + >>> note_to_level(df.loc[1]) + 1 + >>> note_to_level(df.loc[2]) + 'na' + >>> note_to_level(df.loc[3]) + 3 + >>> note_to_level(df.loc[5]) + 3 + >>> note_to_level(df.loc[10]) + 2 + """ + + if pd.isnull(x["Note"]): + return "na" + + if x["Niveau"]: + return int(x["Note"]) + else: + return int(ceil(x["Note"] / x["Bareme"] * 3)) + def question_uniq_formater(row): """ Create a kind of unique description of the question @@ -154,6 +194,39 @@ def compute_marks(df): """ return df[["Note", "Niveau", "Bareme"]].apply(note_to_mark, axis=1) +def compute_level(df): + """ Add Mark column to df + + :param df: DataFrame with "Note", "Niveau" and "Bareme" columns. + + >>> d = {"Eleve":["E1"]*6 + ["E2"]*6, + ... "Nom": ["N1"]*4+["N2"]*2 + ["N1"]*4+["N2"]*2, + ... "Exercice":["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"] + ["Ex1"]*2+["Ex2"]*2+["Ex1"]+["Ex2"], + ... "Question":["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"] + ["Q1"]+["Q2"]+["Q1"]+["Q2"]+["Q1"]+["Q1"], + ... "Date":["16/09/2016"]*4+["01/10/2016"]*2 + ["16/09/2016"]*4+["01/10/2016"]*2, + ... "Trimestre": ["1"]*12, + ... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2, + ... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2, + ... "Note":[np.nan, 0.33, 2, 1.5, 1, 3, 0.666, 1, 1.5, 1, 2, 3], + ... } + >>> df = pd.DataFrame(d) + >>> compute_level(df) + 0 na + 1 1 + 2 3 + 3 3 + 4 1 + 5 3 + 6 2 + 7 3 + 8 3 + 9 2 + 10 2 + 11 3 + dtype: object + """ + return df[["Note", "Niveau", "Bareme"]].apply(note_to_level, axis=1) + def compute_latex_rep(df): """ Add Latex_rep column to df @@ -340,24 +413,24 @@ def digest_flat_df(flat_df): ... "Trimestre": ["1"]*12, ... "Bareme":[1]*2+[2]*2+[2]*2 + [1]*2+[2]*2+[2]*2, ... "Niveau":[0]*4+[1]*2 + [0]*4+[1]*2, - ... "Note":[1, 0.33, 2, 1.5, 1, 3, nan, 0, 0, nan, nan, nan], + ... "Note":[1, 0.33, 2, 1.5, 1, 3, np.nan, 0, 0, np.nan, np.nan, np.nan], ... } >>> df = pd.DataFrame(d) >>> quest_df, exo_df, eval_df = digest_flat_df(df) - >>> quest_df[['Eleve', "Nom", "Mark", "Latex_rep", "Normalized", "Uniq_quest"]] - Eleve Nom Mark Latex_rep Normalized Uniq_quest - 0 E1 N1 1.00 1 1.00 Ex1 Q1 - 1 E1 N1 0.33 0.33 0.33 Ex1 Q2 - 2 E1 N1 2.00 2 1.00 Ex2 Q1 - 3 E1 N1 1.50 1.5 0.75 Ex2 Q2 - 4 E1 N2 0.67 \RepU 0.33 Ex1 Q1 - 5 E1 N2 2.00 \RepT 1.00 Ex2 Q1 - 6 E2 N1 NaN ?? NaN Ex1 Q1 - 7 E2 N1 0.00 0 0.00 Ex1 Q2 - 8 E2 N1 0.00 0 0.00 Ex2 Q1 - 9 E2 N1 NaN ?? NaN Ex2 Q2 - 10 E2 N2 NaN \NoRep NaN Ex1 Q1 - 11 E2 N2 NaN \NoRep NaN Ex2 Q1 + >>> quest_df[['Eleve', "Nom", "Mark", "Latex_rep", "Normalized", "Uniq_quest", "Level"]] + Eleve Nom Mark Latex_rep Normalized Uniq_quest Level + 0 E1 N1 1.00 1 1.00 Ex1 Q1 3 + 1 E1 N1 0.33 0.33 0.33 Ex1 Q2 1 + 2 E1 N1 2.00 2 1.00 Ex2 Q1 3 + 3 E1 N1 1.50 1.5 0.75 Ex2 Q2 3 + 4 E1 N2 0.67 \RepU 0.33 Ex1 Q1 1 + 5 E1 N2 2.00 \RepT 1.00 Ex2 Q1 3 + 6 E2 N1 NaN ?? NaN Ex1 Q1 na + 7 E2 N1 0.00 0 0.00 Ex1 Q2 0 + 8 E2 N1 0.00 0 0.00 Ex2 Q1 0 + 9 E2 N1 NaN ?? NaN Ex2 Q2 na + 10 E2 N2 NaN \NoRep NaN Ex1 Q1 na + 11 E2 N2 NaN \NoRep NaN Ex2 Q1 na >>> exo_df[['Eleve', "Nom", "Exercice", "Mark", "Normalized"]] Eleve Nom Exercice Mark Normalized 0 E1 N1 Ex1 1.5 0.75 @@ -376,8 +449,9 @@ def digest_flat_df(flat_df): 3 1 E2 N2 1 4.0 01/10/2016 NaN NaN """ # Remove data with "nn" (non notés) - df = flat_df.copy()[flat_df["Note"] != "nn"] + df = flat_df.copy()[flat_df["Note"].astype("object") != "nn"] df["Mark"] = compute_marks(df) + df["Level"] = compute_level(df) df["Latex_rep"] = compute_latex_rep(df) df["Normalized"] = compute_normalized(df) df["Uniq_quest"] = compute_question_description(df)